def normalise_image_size_and_bbox(): create_dir(NORMALISED_IMAGES_PATH) create_dir(NORMALISED_BBOX_IMAGES_PATH) LOGGER.debug('Normalising image sizes to same size') fp = open(MIN_DIM, 'rb') dim = pickle.load(fp) LOGGER.debug('Normalising all images to %d x %d', dim, dim) bbox_json = read_json_file(BBOX_JSON_PATH) norm_bbox_json = {} for image in bbox_json.keys(): img = cv2.imread(RAW_TRAIN_IMAGES_PATH + image) y_ = img.shape[0] x_ = img.shape[1] img = cv2.resize(img, (dim, dim)) cv2.imwrite(NORMALISED_IMAGES_PATH + image, img) x_scale = dim / x_ y_scale = dim / y_ tc_x = int(np.round(bbox_json[image][0][0] * x_scale)) tc_y = int(np.round(bbox_json[image][0][1] * y_scale)) bl_x = int(np.round(bbox_json[image][1][0] * x_scale)) bl_y = int(np.round(bbox_json[image][1][1] * y_scale)) norm_bbox_json[image] = [tc_x, tc_y, bl_x, bl_y] im_bbox = cv2.rectangle(img, (tc_x, tc_y), (bl_x, bl_y), (255, 0, 0), 2) cv2.imwrite(NORMALISED_BBOX_IMAGES_PATH + image, im_bbox) write_json_to_file(norm_bbox_json, NORM_BBOX_JSON_PATH)
def store_parameter(product_number, tester, req_file, golden_file, dut_file, output_dir): """ Store the parameters to module report. Arguments: product_number - product number of DUT tester - tester of DB check req_file - requirement file golden_file - golden file dut_file - dut file output_dir - output directory """ global g_product_number global g_tester global g_req_file global g_golden_file global g_dut_file global g_output_dir global result_path g_product_number = product_number g_tester = tester g_req_file = req_file g_golden_file = golden_file g_dut_file = dut_file g_output_dir = output_dir if not g_output_dir: g_output_dir = os.getcwd() result_path_suffix = time.strftime("_%Y%m%d_%H%M%S") result_path = os.path.join(g_output_dir, "result" + result_path_suffix) logger().info("result_path= %s" % result_path) uti.create_dir(result_path)
def model_inference(self): """ Functionality we are trying to achieve 1. get the anchors which are positively predicted (indexes) 2. get the dim of anchors using index 3. get the delta of anchors using pred index, add and we get set of anchor boxes 4. scale the acnhor boxes by multiplying by 16 5. we have regions 6. reshape to diagonals and put bounding box over it """ abox = AnchorBox() create_dir(TEST_OUTPUT_PATH) image_list = os.listdir(TEST_IMAGES_PATH) all_predicted_diagonals = [] LOGGER.info('Inference begun') for image in image_list: path_of_image = TEST_IMAGES_PATH + '/' + image img = cv2.resize(cv2.imread(path_of_image), (self.image_size, self.image_size)) img = torch.tensor(img).float().permute(2, 0, 1).unsqueeze(0) img = img.to(self.device) pred_cls, pred_reg = self.forward(img) pred_cls = pred_cls.detach().cpu() pred_reg = pred_reg.detach().cpu() foreground_idx = 0 scores = [] for cls_out in pred_cls[0]: cls_out = Softmax(dim=0).forward(cls_out) if cls_out[torch.argmax(cls_out).item()] > 0.8: # foreground_prob = torch.max(Softmax(dim=0).forward(cls_out)).item() # foreground_prob = torch.max(torch.exp(LogSoftmax(dim=0).forward(cls_out))).item() foreground_prob = torch.max(cls_out).item() scores.append(foreground_prob) delta_xywh = pred_reg[0][foreground_idx] anchor_xywh = abox.anchor_boxes[foreground_idx] proposed_region = torch.mul( torch.add(delta_xywh, anchor_xywh), torch.tensor([VGG_SCALE_SIZE]).float()) diag = get_diagonal_from_mpwh(proposed_region.tolist()) all_predicted_diagonals.append( [diag[0][0], diag[0][1], diag[1][0], diag[1][1]]) foreground_idx = foreground_idx + 1 LOGGER.debug('Length of predicted_diagonals: %s', str(all_predicted_diagonals)) boxes = torch.tensor(all_predicted_diagonals) scores = torch.tensor(scores) keep, max_count = non_max_suppress(boxes, scores) for idx in range(max_count): box = boxes[keep[idx].item()] img = cv2.imread(path_of_image) img = cv2.resize(img, (self.image_size, self.image_size)) im_bbox = cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 2) cv2.imwrite(TEST_OUTPUT_PATH + image, im_bbox)
def main(): """ Max Ent Model on fMRI data """ args = parse_args() input_path = args.input output_path = args.output brain_areas = args.areas threshold = args.threshold brain_areas = [[ 21, 3, 19, 24, 16, 20, 18, 17, 62, 40, 45, 46, 41, 49, 44, 48, 47, 25 ], [36, 50, 15, 29], [51, 58, 34, 60, 56, 55, 14, 7, 10, 9], [2, 12, 8, 5, 13, 11, 4, 63, 53, 54, 57, 52, 61], [37, 39, 59, 38, 6, 26, 28, 27], [30, 35, 23, 22, 32, 31, 0, 1, 64, 42, 43, 33, 65]] lambdas_dict = { 'lambda1': -4.37389, 'lambda2': -0.0181369, 'lambda3': 0.0434916, 'lambda4': 0.0138453, 'lambda5': -0.182505, 'lambda6': 0.0141578, 'lambda7': 0.0448107, 'lambda8': -0.143712, 'lambda9': -0.0357837, 'lambda10': -0.0992271, 'lambda11': -0.0599827, 'lambda12': -0.194269, 'lambda13': -0.0807454, 'lambda14': -0.0203112, 'lambda15': 0.205356, 'lambda16': -0.0932409, 'lambda17': -0.286996, 'lambda18': -0.0666362, 'lambda19': 0.047215, 'lambda20': -0.0219382, 'lambda21': 0.0555467, 'lambda22': -0.417749 } create_dir(output_path) array = mean_over_areas(input_path, brain_areas, output_path) binarized = binrize(array, threshold) unique, count, patterns, count_norm = number_of_patterns(binarized) entropy_of_distribution_data(count_norm, output_path) sequence_count_plot(patterns, count_norm, output_path) areas_average = average_activity(binarized, output_path) empirical_pairwise_activation(binarized, output_path) pairwise_covar(binarized, areas_average, output_path) probabilities, probas_list = calculate_prob_max_ent( lambdas_dict, output_path) sequence_count_plot_model(patterns, count_norm, probabilities, output_path) entropy_model(probas_list, output_path)
def preform_lle_on_dynamic_connectivity(input_path, output_path, brain_areas, pattern): """ Computes the dynamic connectivity of brain areas with performing a locally linear embedding returning its matrix. :param input_path: path to input dir :type input_path: str :param output_path: path to output directory :type output_path: str :param brain_areas: number of brain areas :type brain_areas: int :param pattern: pattern of input files :type pattern: str :return: LLE matrix, LLE matrix shape :rtype: np.ndarray, tuple """ paths = return_paths_list(input_path, output_path, pattern=pattern) n_subjects = len(paths) array = np.genfromtxt(paths[0], delimiter=',') t_phases = array.shape[0] dFC = np.full((brain_areas, brain_areas), fill_value=0).astype(np.float64) lle_components = np.full((n_subjects, t_phases, (brain_areas * 2)), fill_value=0).astype(np.float64) for n in tqdm(range(0, n_subjects)): phases = convert_to_phases(paths[n], output_path, brain_areas, t_phases, n) for t in range(0, t_phases): for i in range(0, brain_areas): for z in range(0, brain_areas): if np.absolute(phases[i, t] - phases[z, t]) > np.pi: dFC[i, z] = np.cos(2 * np.pi - np.absolute(phases[i, t] - phases[z, t])) else: dFC[i, z] = np.cos( np.absolute(phases[i, t] - phases[z, t])) dfc_output = os.path.join(output_path, 'dFC') create_dir(dfc_output) np.savez( os.path.join(dfc_output, 'subject_{}_time_{}'.format(n, t)), dFC) lle, err = manifold.locally_linear_embedding(dFC, n_neighbors=12, n_components=2) with open( os.path.join(output_path, 'LLE_error_{}_{}'.format(n, t)), 'w') as output: json.dump(err, output) lle_components[n, t, :] = np.squeeze(lle.flatten()) # save the LLE matrix into a .npz file np.savez(os.path.join(output_path, 'components_matrix'), lle_components) return lle_components, lle_components.shape
def main(): """ DFC features """ args = parse_args() input_path = args.input output_path = args.output starts_json = args.starts clusters = args.clusters brain_areas = args.features # Load labels and starts json and divide labels by tasks into separate # folders labels = np.load(clusters)['arr_0'] with open(starts_json) as s: starts = json.load(s) clusters = [] cluster_paths = [] for key, values in tqdm(starts.iteritems()): output_p = os.path.join(output_path, key) create_dir(os.path.join(output_p, 'dFC_out')) new_array = labels[values[0]: values[1], :] dFC_paths = return_paths_list(os.path.join(input_path, key, 'dFC'), output_path, '.npz') np.savez(os.path.join(output_path, key, 'dFC_out', 'labels_{}'.format(key)), new_array) # Tasks labels divide dFCs according to labels into states folders for n in dFC_paths: cluster = new_array[dFC_paths.index(n)][-1] clusters.append(cluster) cluster_output = os.path.join(output_p, 'dFC_out', str(cluster)) create_dir(cluster_output) cluster_paths.append(cluster_output) file_name = os.path.basename(n) shutil.copyfile(n, os.path.join(cluster_output, file_name)) # Average for clusters and visualise n_clusters = max(clusters) # Number of clusters cluster_paths = list(set(cluster_paths)) for c in tqdm(cluster_paths): matrix_paths = return_paths_list(c, output_path, '.npz') n_matrix = len(matrix_paths) avg_dfc = np.full((n_matrix, brain_areas, brain_areas), fill_value=0).astype( np.float64) for i in range(n_matrix): matrix = np.load(matrix_paths[i])['arr_0'] avg_dfc[i, :, :] = matrix averaged = np.average(avg_dfc, 0) # Average over all matrices in a cluster np.savez(os.path.join(c, 'averaged_dfc'), averaged) plot_dfc_areas_correlation(averaged, c)
def get_img_bbox_coors(limit=9999999999999999999): create_dir(BBOX_IMAGES_PATH) LOGGER.debug('Getting image map with coordinates for Bounding Box') json_processed = dict() count = 0 for image in IMAGES: if image['file_name'] in IMAGES_IN_DIR: image_id = image['id'] tc_coor, bl_coor = get_coors_from_annotation_by_id(image_id) json_processed[image['file_name']] = [tc_coor, bl_coor] print(json_processed) count += 1 if count >= limit: break write_json_to_file(json_processed, BBOX_JSON_PATH)
def main(args): print('Reading config') config = utilities.read_json_config(args.config, utilities.Task.parse) print('Starting parsing...') output_dir = '{}/{}'.format(config['dir']['data'], config['name']) print('Creating data directory {}'.format(output_dir)) utilities.create_dir(output_dir) print('Reading raw data...') threads = [] pbar = tqdm(range(len(config['data']))) for index in pbar: infile = config['data'][index]['file'] outfile = '{}/{}.csv'.format(output_dir, config['data'][index]['name']) pbar.set_description('Processing raw_data in={} out={}'.format( infile, outfile)) parse( infile, outfile, config['data'][index]['old_format'] if 'old_format' in config['data'][index] else False)
def save_report(): """ Save the report document. """ global result_path global report_file dir_existed = os.path.isdir(result_path) if not dir_existed: uti.create_dir(result_path) report_file = os.path.join(result_path, "DbCheckReport.docx") try: document.save(report_file) except PermissionError: logger().exception('Report file has been opened by another program.') os.sys.exit(1) else: logger().info("%s is saved successfully." % os.path.abspath(report_file))
def main(args): print('Reading config...') config = utilities.read_json_config(args.config, utilities.Task.train) train_type = str(args.type) print('Preparing output directory...') output_dir = '{}/{}/train/{}'.format(config['dir']['output'], config['name'], train_type) utilities.create_dir(output_dir) print('Preparing dataset...') dataset = prepare_dataset(config, train_type, get_data_col(args.type)) print('Preparing trainers...') trainers = prepare_trainer(config) print('There are {} models that needs to be trained'.format(len(trainers))) print() print('Training predictors...') predictors = train_predictor(config, trainers, dataset) print(predictors) print() print('Test predictors...') tests = test_predictor(predictors, dataset) print('Preparing other output dirs') cdf_dir = '{}/cdf'.format(output_dir) gnuplot_dir = '{}/gnuplot'.format(output_dir) plot_dir = '{}/plot'.format(output_dir) model_dir = '{}/model'.format(output_dir) utilities.create_dir(cdf_dir) utilities.create_dir(gnuplot_dir) utilities.create_dir(plot_dir) utilities.create_dir(model_dir) print('Generate diff and plots...') pbar = tqdm(predictors) for predictor in pbar: pbar.set_description('Generate diffs for {}'.format(predictor)) diff = generate_diff(config, predictors, predictor, dataset) pbar.set_description('Saving diffs for {}'.format(predictor)) sorted_indexes = save_diff(config, cdf_dir, predictor, diff) pbar.set_description('Creating plot for {}'.format(predictor)) save_plot(config, cdf_dir, gnuplot_dir, plot_dir, predictor, diff, sorted_indexes) pbar.set_description('Saving model for {}'.format(predictor)) utilities.save('{}/{}.joblib'.format(model_dir, predictor), predictors[predictor])
def dynamic_functional_connectivity(input_path, output_path, brain_areas, pattern): """ Computes the dynamic functional connectivity of brain areas. :param input_path: path to input dir :type input_path: str :param output_path: path to output directory :type output_path: str :param brain_areas: number of brain areas :type brain_areas: int :param pattern: pattern of input files :type pattern: str :return: dFC output path :rtype: str """ paths = return_paths_list(input_path, output_path, pattern=pattern) n_subjects = len(paths) array = np.genfromtxt(paths[0], delimiter=',') t_phases = array.shape[0] dFC = np.full((brain_areas, brain_areas), fill_value=0).astype(np.float64) for n in tqdm(range(n_subjects)): phases = convert_to_phases(paths[n], output_path, brain_areas, t_phases, n) for t in range(0, t_phases): for i in range(0, brain_areas): for z in range(0, brain_areas): if np.absolute(phases[i, t] - phases[z, t]) > np.pi: dFC[i, z] = np.cos(2 * np.pi - np.absolute(phases[i, t] - phases[z, t])) else: dFC[i, z] = np.cos( np.absolute(phases[i, t] - phases[z, t])) dfc_output = os.path.join(output_path, 'dFC') create_dir(dfc_output) np.savez( os.path.join(dfc_output, 'subject_{}_time_{}'.format(n, t)), dFC) return dfc_output
def model_inference(self): create_dir(TEST_OUTPUT_PATH) image_list = os.listdir(TEST_IMAGES_PATH) LOGGER.info('Inference begun') for image in image_list: path_of_image = TEST_IMAGES_PATH + '/' + image img = cv2.resize(cv2.imread(path_of_image), (self.image_size, self.image_size)) img = torch.tensor(img).float().permute(2, 0, 1).unsqueeze(0) img = img.to(self.device) predicted_width, predicted_height, predicted_midpoint = self.forward( img) mp_x = predicted_midpoint[0][0].detach().cpu().item() mp_y = predicted_midpoint[0][1].detach().cpu().item() w = predicted_width[0].detach().cpu().item() h = predicted_height[0].detach().cpu().item() diag = get_diagonal_from_mpwh([mp_x, mp_y, w, h]) img = cv2.imread(path_of_image) img = cv2.resize(img, (self.image_size, self.image_size)) im_bbox = cv2.rectangle(img, diag[0], diag[1], (255, 0, 0), 2) cv2.imwrite(TEST_OUTPUT_PATH + image, im_bbox)
def save(self): # Maybe delete previous ref file dir_existed = os.path.isdir(report.result_path) if not dir_existed: uti.create_dir(report.result_path) suffix = time.strftime("%Y%m%d_%H%M%S") if self.etype == self.TABLE: filename = os.path.abspath( report.result_path + '\\{}_{}.csv'.format( self.title.replace('/', '_').replace(':', '-'), suffix)) with open(filename, "w") as file: for row in self.t_data: file.write(','.join(row) + "\n") else: filename = os.path.abspath( report.result_path + '\\{}_{}.png'.format( self.title.replace('/', '_').replace(':', '-'), suffix)) plt.gcf().savefig(filename) logger().debug("Figure of command(%s) is saved as:%s" % (self.title, filename)) self.ref = filename return filename
def main(args): print('Reading config...') config = utilities.read_json_config(args.config, utilities.Task.inference) print('Preparing output directory...') output_dir = '{}/{}/inference'.format(config['dir']['output'], config['name']) utilities.create_dir(output_dir) print('Preparing dataset...') datasets = prepare_dataset(config, COMBINED_COL) print('Preparing predictors...') main_predictor = utilities.load(config['model']['main']['file']) stringtable_predictor = utilities.load( config['model']['stringtable']['file']) prune_predictor = utilities.load(config['model']['prune']['file']) otyrt_predictor = utilities.load(config['model']['otyrt']['file']) print('Preparing other output dirs') cdf_dir = '{}/cdf'.format(output_dir) gnuplot_dir = '{}/gnuplot'.format(output_dir) plot_dir = '{}/plot'.format(output_dir) utilities.create_dir(cdf_dir) utilities.create_dir(gnuplot_dir) utilities.create_dir(plot_dir) pbar = tqdm(range(len(datasets))) for idx in pbar: name = config['data'][idx]['name'] pbar.set_description( 'Outputting performance metrics for dataset {}'.format(name)) mse, r2 = test_predictor(datasets[idx], main_predictor, stringtable_predictor, prune_predictor, otyrt_predictor) pbar.set_description('Generating diffs for dataset {}'.format(name)) diff = generate_diff(datasets[idx], main_predictor, stringtable_predictor, prune_predictor, otyrt_predictor) pbar.set_description( 'Saving diffs for dataset {} prediction'.format(name)) sorted_indexes = save_diff(config, cdf_dir, config['data'][idx]['name'], diff) pbar.set_description( 'Creating plot for database {} prediction'.format(name)) save_plot(config['model'], config['data'][idx], cdf_dir, gnuplot_dir, plot_dir, diff, sorted_indexes) print('Saving combined plot') save_plots(config, cdf_dir, gnuplot_dir, plot_dir)
def thumbnail(session, db_object, db_manager, parent=None): db_manager.flush(session) if db_object.id: if parent: thumbnail_path = '{}/{}-{}/{}-{}/thumbnail.jpg'.format( CronJobManager.download_path, parent.title, parent.id, db_object.title, db_object.id) else: thumbnail_path = '{}/{}-{}/thumbnail.jpg'.format( CronJobManager.download_path, db_object.title, db_object.id) if not utils.dir_exists(thumbnail_path): utils.create_dir(thumbnail_path.rsplit('/', 1)[0]) CronJobManager.__instance.dmm_ripper.download_image( db_object.thumbnail_dmm, thumbnail_path) CronJobManager.logger.info('Storing thubnail in %s', thumbnail_path) db_object.thumbnail_local = thumbnail_path try: db_manager.commit(session) except Exception as e: CronJobManager.logger.exception('Error updating local ' \ + 'thubnail\'s path for: %s', db_object.title) db_manager.rollback(session)
def run(self): if self.runlist: self.use_runlist = self.runlist else: self.default_runlist() #check if user provided runlist exsis or not if not utilities.is_file_valid(self.use_runlist): raise RunfileError("runlist %s does not exist" % self.use_runlist) #check if targetfile exist or not if (not self.targetfile) or \ (not utilities.is_file_valid(self.targetfile)): raise RunnerError("Target file %s is not passed or does not exist" % (self.targetfile)) #create log folder if not existing if self.bat not in self.bat_list: raise RunnerError("Invalid bat type %s, provide bvt, ceph_qa") #check if set_number is provided, not mendatory for bvt if self.bat != 'bvt': if not self.set_number: raise RunnerError("Please pass set_number with -s option") self.log_dir_inuse = self.default_log_path_dict.get(self.bat) if not utilities.is_path(self.log_dir_inuse): utilities.create_dir(self.log_dir_inuse) #start executing the test present in the runlist obj = teuthology.Teuthology(targetfile=self.targetfile, bat_type=self.bat, set_number=self.set_number, report_result=self.report_result, runlist_path=self.use_runlist, log_path=self.log_dir_inuse, nuke=self.debug, mail=self.mail, no_poweroff=self.no_poweroff) obj.run()
def model_train(self, epoch_offset=0, lamda=10, nreg=2400, ncls=256): LOGGER.info('Started Training with an offset of %s', str(epoch_offset)) create_dir(MODEL_SAVE_PATH) optimizer = SGD(self.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM) scheduler = StepLR(optimizer, step_size=SCHEDULER_STEP, gamma=SCHEDULER_GAMMA) LOGGER.info( 'Learning Rate: %s, Momentum: %s, Scheduler_step: %s, scheduler_gamma: %s', str(LEARNING_RATE), str(MOMENTUM), str(SCHEDULER_STEP), str(SCHEDULER_GAMMA)) loss_for_classification = NLLLoss() loss_for_regression = SmoothL1Loss() img_coors_json = read_json_file(BBOX_XYWH_JSON_PATH) anchor_box = AnchorBox() all_background_index = [] all_foreground_index = [] all_reg_tensor = [] for image, coors in img_coors_json.items(): li_fore_index, li_back_index, reg_ten_actual = \ anchor_box.calculate_p_for_each_anchor_box(anchor_box.anchor_boxes, coors) all_background_index.append(li_back_index) all_foreground_index.append(li_fore_index) all_reg_tensor.append(reg_ten_actual) for epoch in range(EPOCHS): epoch_loss = 0.0 scheduler.step(epoch) LOGGER.debug('Epoch: %s, Current Learning Rate: %s', str(epoch + epoch_offset), str(scheduler.get_lr())) count = 0 for image, coors in img_coors_json.items(): img = cv2.imread(NORMALISED_IMAGES_PATH + image) img = torch.tensor(img).float().permute(2, 0, 1).unsqueeze(0) img = img.to(self.device) pred_cls, pred_reg = self.forward(img) li_foreground_index = all_foreground_index[count] li_background_index = all_background_index[count] reg_tensor_actual = all_reg_tensor[count] count = count + 1 exp_torch_fg_bg = [] pred_torch_fg = torch.zeros(1, pred_cls.shape[2]) pred_torch_fg = pred_torch_fg.to(self.device) pred_torch_reg = torch.zeros(1, pred_reg.shape[2]) pred_torch_reg = pred_torch_reg.to(self.device) for idx_foreground in li_foreground_index: exp_torch_fg_bg.append(1) pred_torch_fg = torch.cat( (pred_torch_fg, pred_cls[0][idx_foreground].unsqueeze(0)), dim=0) pred_torch_reg = torch.cat( (pred_torch_reg, pred_reg[0][idx_foreground].unsqueeze(0)), dim=0) pred_torch_fg = pred_torch_fg[1:] pred_torch_reg = pred_torch_reg[1:] pred_torch_bg = torch.zeros(1, pred_cls.shape[2]) pred_torch_bg = pred_torch_bg.to(self.device) for idx_background in li_background_index: exp_torch_fg_bg.append(0) pred_torch_bg = torch.cat( (pred_torch_bg, pred_cls[0][idx_background].unsqueeze(0)), dim=0) pred_torch_bg = pred_torch_bg[1:] pred_cls_only_background_foreground = torch.cat( (pred_torch_fg, pred_torch_bg), dim=0) pred_cls_only_background_foreground = LogSoftmax(dim=1).\ forward(pred_cls_only_background_foreground) exp_torch_fg_bg = torch.tensor(exp_torch_fg_bg) exp_torch_fg_bg = exp_torch_fg_bg.to(self.device) pred_cls_only_background_foreground = pred_cls_only_background_foreground.to( self.device) reg_tensor_actual = reg_tensor_actual.to(self.device) pred_torch_reg = pred_torch_reg.to(self.device) cls_loss = loss_for_classification( pred_cls_only_background_foreground, exp_torch_fg_bg) reg_loss = loss_for_regression(reg_tensor_actual, pred_torch_reg) total_image_loss = (cls_loss / ncls) + (reg_loss * lamda / nreg) total_image_loss = total_image_loss.to(self.device) optimizer.zero_grad() total_image_loss.backward() optimizer.step() epoch_loss = epoch_loss + total_image_loss.item() LOGGER.debug('Loss at Epoch %s: %s', str(epoch + epoch_offset), str(epoch_loss)) if epoch % EPOCH_SAVE_INTERVAL == 0: torch.save( self.state_dict(), MODEL_SAVE_PATH + 'model_epc_' + str(epoch + epoch_offset) + '.pt') if epoch % 5 == 0: LOGGER.info('Loss at Epoch %s: %s', str(epoch + epoch_offset), str(epoch_loss))
def model_train(self, epoch_offset=0): create_dir(MODEL_SAVE_PATH) loss_for_regression = MSELoss() img_coors_json = read_json_file(BBOX_XYWH_JSON_PATH) optimizer = RMSprop(self.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM) # optimizer = Adam(self.parameters(), lr=LEARNING_RATE) # optimizer = SGD(self.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM) scheduler = StepLR(optimizer, step_size=SCHEDULER_STEP, gamma=SCHEDULER_GAMMA) for epoch in range(EPOCHS): epoch_loss = 0.0 scheduler.step(epoch) LOGGER.debug('Epoch: %s, Current Learning Rate: %s', str(epoch + epoch_offset), str(scheduler.get_lr())) for image, coors in img_coors_json.items(): path_of_image = NORMALISED_IMAGES_PATH + image path_of_image = path_of_image.replace('%', '_') img = cv2.imread(path_of_image) img = torch.tensor(img).float().permute(2, 0, 1).unsqueeze(0) img = img.to(self.device) predicted_width, predicted_height, predicted_midpoint = self.forward( img) #all are scaled mp_x = coors[0][0] mp_y = coors[0][1] mp = torch.cat((torch.tensor([[mp_x]]).to( self.device), torch.tensor([[mp_y]]).to(self.device)), dim=1).float() w = coors[0][2] h = coors[0][3] loss1 = loss_for_regression( predicted_height, torch.tensor([[h]]).float().to(self.device)) loss2 = loss_for_regression( predicted_width, torch.tensor([[w]]).float().to(self.device)) loss3 = loss_for_regression(predicted_midpoint, mp.to(self.device)) loss = loss1 + loss2 + loss3 / 2 optimizer.zero_grad() loss.backward() clip_grad_norm(self.parameters(), 0.5) optimizer.step() epoch_loss = epoch_loss + loss.item() if epoch % 5 == 0: print('epoch: ' + str(epoch) + ' ' + 'loss: ' + str(epoch_loss)) if epoch % EPOCH_SAVE_INTERVAL == 0: print('saving') torch.save( self.state_dict(), MODEL_SAVE_PATH + 'model_epc_' + str(epoch + epoch_offset) + '.pt') torch.save( self.state_dict(), MODEL_SAVE_PATH + 'model_epc_' + str(epoch + epoch_offset) + '.pt')
from plotting import plot_history from settings import DOGS_VS_CATS_DIR, CONV_NETS_DIR, DOGS_VS_CATS_WEIGHTS_DIR from utilities import create_dir N_TRAIN_IMAGES = 3000 N_VALIDATION_IMAGES = 500 N_TEST_IMAGES = 1000 BATCH_SIZE = 64 EPOCHS = 10 USE_GENERATOR = True original_dataset_dir = DOGS_VS_CATS_DIR copying_dataset_dir = osp.join(DOGS_VS_CATS_DIR, 'train') base_dir = osp.join(DOGS_VS_CATS_DIR, 'small_version') create_dir(base_dir) train_dir = osp.join(base_dir, 'train') create_dir(train_dir) validation_dir = osp.join(base_dir, 'validation') create_dir(validation_dir) test_dir = osp.join(base_dir, 'test') create_dir(test_dir) train_cats_dir = osp.join(train_dir, 'cats') create_dir(train_cats_dir) train_dogs_dir = osp.join(train_dir, 'dogs') create_dir(train_dogs_dir) validation_cats_dir = osp.join(validation_dir, 'cats') create_dir(validation_cats_dir)
np.random.seed(args.seed) ## 2. load the data train_x, val_x, test_x, train_y, val_y, test_y, noisy_S = am_util.load_data( args.data_dir) [train_num, feature_dim] = train_x.shape [train_num, output_dim] = train_y.shape batch_num = int(train_num / args.batch_size) ## 3. fix seed for graph and numpy operations tf.reset_default_graph() tf.set_random_seed(args.seed) ## 4.create output folders to save the log files and trained model ## ./output/model_name output_dir = args.output_dir + args.model_name am_util.create_dir(output_dir) ## ./output/model_name/logs/ am_util.create_dir(os.path.join(output_dir, "logs")) ## ./output/model_name/models/ am_util.create_dir(os.path.join(output_dir, "models")) ## 5.define the full graph class NNModel(object): def __init__(self, sess, args): ''' the main neural network model class ''' #self.config = vars(args) self.x = tf.placeholder(tf.float32, [None, feature_dim], name="input") self.y_ = tf.placeholder(tf.float32, [None, output_dim], name="output") self.is_training = tf.placeholder(tf.bool) ## for the augmented data
def request_download_book(self, bot, update, book, user, user_data, inline_message_id): session = self.db_manager.create_session() session.add(book) book_path = utils.get_book_download_path(self.download_path, book) if not utils.dir_exists(book_path): utils.create_dir(book_path) book_images = utils.get_book_page_num_list(book_path) missing_images = utils.book_missing_pages(1, book.pages, book_images) is_toc_missing = not utils.dir_exists(path.join(book_path, 'toc.txt')) self.logger.info('User %s requested to download book %s', user.id, book.id) self.logger.info('Removing download button of inline query of ' \ + 'book %s for user %s', book.id, user.id) bot.editMessageReplyMarkup( chat_id = None, inline_message_id = inline_message_id, reply_markup=None ) if not missing_images and not is_toc_missing: self.logger.info('All the book %s pages are available in local ' \ + 'storage', book.id) if user.file_format == FileFormat.pdf: preferred_format = FileFormat(FileFormat.pdf).name elif user.file_format == FileFormat.epub: preferred_format = FileFormat(FileFormat.epub).name elif user.file_format == FileFormat.zip: preferred_format = FileFormat(FileFormat.zip).name file_format_path = utils.get_book_by_format( book_path, '.{}'.format(preferred_format.lower()) ) if file_format_path: if path.getsize(file_format_path) \ >= CronJobManager.max_upload_size: bot.send_message(chat_id=user.id, text=self.lang[user.language_code]['generate_url'] ) self.scheduler.generante_storage_url( file_format_path, preferred_format, bot, user ) else: self.logger.info('Sending %s book transmission start ' \ + 'message to user %s', book.id, user.id) bot.send_message( chat_id=user.id, text=self.lang[user.language_code]['sending_book'] \ .format(book.title) ) self.logger.info('Sending book %s in %s format to ' \ + 'user %s', book.id, preferred_format, user.id) bot.send_document( chat_id=user.id, document=open(file_format_path, 'rb'), timeout=60 ) else: self.logger.info('%s book not available in %s format', book.id, preferred_format) self.scheduler.subscribe_to_book_conversion(book, book_path, \ user, bot, from_download=False) return ConversationHandler.END else: if not user.save_credentials: user_data['book'] = book user_data['book_path'] = book_path user_data['missing_images'] = missing_images user_data['is_toc_missing'] = is_toc_missing user_data['user'] = user self.logger.info('sending user %s password request message.', user.id) bot.send_message( user.id, self.lang[user.language_code]['request_password'] ) return self.PROCESS_PASSWORD else: self.download_pages( bot, update, book_path, missing_images, is_toc_missing, book, user ) return ConversationHandler.END
def main(): """ Dynamic functional connectivity states clustering """ args = parse_args() input_paths = args.input pattern = args.pattern output_path = os.path.normpath(args.output) brain_areas = args.areas pca = args.pca lle = args.lle clusters = args.clusters t_phases = args.phases db = args.db autoen = args.autoen create_dir(output_path) new_outputs = [] output_paths = [] dfc_paths = [] for input_path in input_paths: new_output = create_new_output_path(input_path, output_path) new_outputs.append(new_output) create_dir(new_output) output_paths.append(os.path.join(new_output, 'components_matrix.npz')) if pca: components, shape = preform_pca_on_dynamic_connectivity( input_path, new_output, brain_areas, pattern) fcd_matrix = functional_connectivity_dynamics( components, new_output) plot_functional_connectivity_matrix(fcd_matrix, new_output) if lle: components, shape = preform_lle_on_dynamic_connectivity( input_path, new_output, brain_areas, pattern) fcd_matrix = functional_connectivity_dynamics( components, new_output) plot_functional_connectivity_matrix(fcd_matrix, output_path) if autoen: dfc_path = dynamic_functional_connectivity(input_path, new_output, brain_areas, pattern) dfc_paths.append(dfc_path) if autoen: dfc_all = preprocess_autoencoder(dfc_paths, output_path, brain_areas) encoded = autoencoder(dfc_all, output_path) if clusters is not None and autoen is False: # concatenate all data concatenated = convert_components(output_paths, output_path) kmeans_clustering_mean_score(concatenated, output_path, clusters) elif clusters is not None and autoen is True: kmeans_clustering_mean_score(encoded, output_path, clusters) elif db: concatenated = convert_components(output_paths, output_path) dbscan(concatenated, output_path) else: # perform clustering on data separately clusters = kmeans_clustering(components, output_path) plot_states_line(clusters, t_phases, output_path)
from utilities import create_dir from utilities import download_master_file from utilities import read_master_file import pandas as pd import requests http = 'http://' gz = '.fastq.gz' xml = '.xml' folders = create_dir('PRJEB11419.txt', 'AG') download_master_file('https://www.ebi.ac.uk/ena/data/warehouse/filereport?accession=PRJEB11419&result=read_run&fields=sample_accession,secondary_sample_accession,tax_id,scientific_name,fastq_ftp&download=txt', folders[0]) download_lists = read_master_file(folders[0], '\t') #download fastq and metadata for i in range (len(download_lists[1])): total_link = http + str(download_lists[0][i]) fastq_download = requests.get(total_link, allow_redirects=True) fastq_file_name = folders[2] + str(download_lists[1][i]) + gz open(fastq_file_name, 'wb').write(fastq_download.content) xml_location = 'https://www.ebi.ac.uk/ena/data/view/{}&display=xml'.format(download_lists[1][i]) xml_download = requests.get(xml_location, allow_redirects=True) xml_file_name = folders[3] + str(download_lists[1][i]) + xml open(xml_file_name, 'wb').write(xml_download.content) with open(folders[1], "a") as f: f.write("Download do fastq {} e do xml da amostra {} feita com sucesso \n".format(fastq_file_name,xml_file_name))
def preform_pca_on_dynamic_connectivity(input_path, output_path, brain_areas, pattern): """ Computes the dynamic connectivity of brain areas with performing a PCA returning its matrix. :param input_path: path to input dir :type input_path: str :param output_path: path to output directory :type output_path: str :param brain_areas: number of brain areas :type brain_areas: int :param pattern: pattern of input files :type pattern: str :return: PCA matrix, PCA matrix shape :rtype: np.ndarray, tuple """ paths = return_paths_list(input_path, output_path, pattern=pattern) n_subjects = len(paths) array = np.genfromtxt(paths[0], delimiter=',') t_phases = array.shape[0] dFC = np.full((brain_areas, brain_areas), fill_value=0).astype(np.float64) pca_components = np.full((n_subjects, t_phases, (brain_areas * 2)), fill_value=0).astype(np.float64) for n in tqdm(range(n_subjects)): phases = convert_to_phases(paths[n], output_path, brain_areas, t_phases, n) for t in range(0, t_phases): for i in range(0, brain_areas): for z in range(0, brain_areas): if np.absolute(phases[i, t] - phases[z, t]) > np.pi: dFC[i, z] = np.cos(2 * np.pi - np.absolute(phases[i, t] - phases[z, t])) else: dFC[i, z] = np.cos( np.absolute(phases[i, t] - phases[z, t])) dfc_output = os.path.join(output_path, 'dFC') create_dir(dfc_output) np.savez( os.path.join(dfc_output, 'subject_{}_time_{}'.format(n, t)), dFC) pca = PCA(n_components=2) # normalize dFC = preprocessing.normalize(dFC, norm='l2') pca.fit(dFC) pca_dict = { 'components': pca.components_.tolist(), 'explained variance': pca.explained_variance_.tolist(), 'explained mean variance': np.mean(pca.explained_variance_.tolist()), 'explained variance ratio': pca.explained_variance_ratio_.tolist(), 'mean': pca.mean_.tolist(), 'n components': pca.n_components_, 'noise variance': pca.noise_variance_.tolist() } with open( os.path.join(output_path, 'PCA_results_{}_{}'.format(n, t)), 'w') as output: json.dump(pca_dict, output) pca_components[n, t, :] = \ pca_dict['components'][0] + pca_dict['components'][1] # save the PCA matrix into a .npz file np.savez(os.path.join(output_path, 'components_matrix'), pca_components) return pca_components, pca_components.shape
train_xx, train_yy, val_x, val_y, test_x, test_y, S_30 = am_util.load_data(args.data_dir + args.data_name + '/') zero_row = np.where(~train_xx.any(axis=1))[0] train_x = np.delete(train_xx, (zero_row), axis=0) train_y = np.delete(train_yy,(zero_row ), axis=0) [train_num, feature_dim] = train_x.shape [train_num, output_dim] = train_y.shape batch_num = int(train_num / args.batch_size) ## 3. fix seed for graph and numpy operations tf.reset_default_graph() tf.set_random_seed(args.seed) ## 4.create output folders to save the log files and trained model #main_output_dir = ./output/data_name/ am_util.create_dir(os.path.join(args.output_dir + args.data_name)) # create forlder for output main_output_dir = args.output_dir + args.data_name ##./output/data_name/model_name/ modelname = args.model_name +'_epsilon_' + str(args.epsilon) + '_reg_param1_' + str(args.reg_param1) + '_reg_param2_' + str(args.reg_param2) am_util.create_dir(os.path.join(args.output_dir, args.data_name, modelname)) ## to save predition result and log files output_dir = main_output_dir + '/modelname/' ##./output/data_name/model_name/models output_model_dir = args.output_dir + args.data_name + '/'+ modelname + "/models" ## forlder to save the model under ourput folder am_util.create_dir(os.path.join(args.output_dir, args.data_name, modelname, "models")) ##./output/data_name/model_name/logs output_log_dir = args.output_dir + args.data_name + '/'+ modelname + "/logs" ## forlder to save the model under ourput folder am_util.create_dir(os.path.join(args.output_dir, args.data_name, modelname, "logs")) ## 5.define the full graph class NNModel(object):
def run_spatial(options): """ Simulate runs for the SPATIAL algorithm """ sch = options.school init = options.initialization runs = options.runs algo = options.algo print('\n... Starting {} algorithm ...\n'.format(algo)) # SPATIAL hyper-parameters pop_size = 10 iter_max = 1000 # Read data files inputs = GetInputs(sch) args = inputs.get_inputs() # Values for the data set weight = 7 # Set weight in the range [0, 1] for calculating F = w * F1 + (1 - w) * F2 set_params(weight, iter_max) # init_type = {1: 'seeded', 2: 'infeasible', 3: 'existing'} existing = gen_solutions(args, 3) print(' Present school boundary configuration has functional value : {:.3f}'.format(existing['func_val'])) for r in range(runs): print('\n Run {} of {}\n'.format(r+1, algo)) try: # Generate random seeds and use them for instantiating initial trial solutions seeds = [s + randrange(1000000) for s in range(pop_size)] solutions = copy.deepcopy(gen_solutions(args, init, pop_size, seeds)) # Find the best solution and its functional value best_func_val, best_sol, stagnation = find_best_sol(solutions, math.inf, 0) print('Iter: 0 \t Best func_val: {:.3f}'.format(best_func_val)) iteration = 0 t_start = time.time() fval_iter = [(0, best_func_val)] # List to save best solutions time_iter = [(0, time.time() - t_start)] # Iteratively improve the solutions using the two search operators for it in tqdm(range(iter_max)): run_module(args, 0, solutions) # Local improvement run_module(args, 1, solutions) # Spatially-aware recombination iteration = it + 1 best_func_val, best_sol, stagnation = find_best_sol(solutions, best_func_val, stagnation) # Print the best result if iteration % 20 == 0: print('Iter: {} \t Best func_val: {:.3f}'.format(iteration, best_func_val)) fval_iter.append((iteration, best_func_val)) time_iter.append((iteration, time.time() - t_start)) # Printing the results t_elapsed = (time.time() - t_start) / 60.0 # measures in minutes best_solution = solutions[best_sol] print("Run: {} took {:.2f} min to execute {} iterations...\n" " Obtained FuncVal: {:.3f} ".format(r + 1, t_elapsed, iteration, best_func_val)) # Save the results print(' Checking the correctness of the solution...') correct = check_solution(args, best_solution['zones']) if correct: print('\n Correct solution.. Saving results .... ') w1, w2, epsilon, _ = parameters() params = get_params(AlgParams=get_params(w1=w1, w2=w2, epsilon=epsilon, pop_size=pop_size), Iteration=iteration, TimeElapsed=t_elapsed, School=sch) solu_info = {'Existing': existing, 'Final': best_solution, 'fval_vs_iter': fval_iter} run_results = {'properties': params, 'info': solu_info} write_path = create_dir('results', algo, sch) with open(join(write_path, "run{}_{}_{}.json".format(r + 1, algo, sch)), 'w') as outfile: json.dump(run_results, outfile) else: print('\n Incorrect solution. Has disconnected zones... \n') except Exception as e: print('Run {} incomplete due to error: {}'.format(r + 1, e))
def main(): """ FC states features """ args = parse_args() input_path = args.input separate = args.separate output_path = args.output n_clusters = args.n_clusters starts_json = args.starts clusters = args.clusters create_dir(output_path) reduced_components = np.load(input_path)['arr_0'][:, :-1] variance = variance_of_states(reduced_components, output_path) labels = np.load(clusters)['arr_0'] plot_variance(labels, variance, output_path) probabilities, lifets = distribution_probability_lifetime( labels, output_path, n_clusters) entropy_of_states(probabilities, output_path, n_clusters) if separate: probas_p_values = [] lifetimes_p_values = [] new_paths = separate_concat_array(input_path, starts_json, output_path, n_clusters) for path in tqdm(new_paths): output_p = os.path.join(output_path, os.path.basename(os.path.dirname(path))) create_dir(output_p) matrix = np.load(path)['arr_0'] clusters = matrix[:, -1] reduced_task = matrix[:, :-1] probas, lifetimes = distribution_probability_lifetime( clusters, output_p, n_clusters) task_var = variance_of_states(reduced_task, output_p) plot_variance(clusters, task_var, output_p) entropy_of_states(probas, output_p, n_clusters) for a, b in itertools.combinations(new_paths, 2): group_a = np.load(a)['arr_0'][:, -1] group_b = np.load(b)['arr_0'][:, -1] a_name = os.path.basename(os.path.dirname(a)) b_name = os.path.basename(os.path.dirname(b)) output = os.path.join(output_path, a_name + '_' + b_name) create_dir(output) proba_a = probability_of_state(group_a, n_clusters, output) proba_a = {int(k): v for k, v in proba_a.items()} proba_b = probability_of_state(group_b, n_clusters, output) proba_b = {int(k): v for k, v in proba_b.items()} lt_a = mean_lifetime_of_state(group_a, n_clusters, output) lt_a = {int(k): v for k, v in lt_a.items()} lt_b = mean_lifetime_of_state(group_b, n_clusters, output) lt_b = {int(k): v for k, v in lt_b.items()} probas_a = [proba_a[i] for i in group_a] probas_b = [proba_b[y] for y in group_b] lts_a = [lt_a[m] for m in group_a] lts_b = [lt_b[n] for n in group_b] cond_a = [a_name for i in range(len(group_a))] cond_b = [b_name for z in range(len(group_b))] dict_prob = { 'probability': probas_a + probas_b, 'lifetime': lts_a + lts_b, 'condition': cond_a + cond_b, 'cluster': group_a.tolist() + group_b.tolist() } df = pd.DataFrame(data=dict_prob) df.to_csv(os.path.join(output, 'probas_lt_dataframe.csv')) plot_probabilities_barplots(df, output) plot_lifetimes_barplots(df, output) for c in tqdm(range(n_clusters)): df_n = df[df['cluster'] == c] con_a = df_n[df_n['condition'] == a_name] con_b = df_n[df_n['condition'] == b_name] t_prob, p_prob = students_t_test( con_a['probability'], con_b['probability'], os.path.join(output, str(c), 'probability')) t_lt, p_lt = students_t_test( con_a['lifetime'], con_b['lifetime'], os.path.join(output, str(c), 'lifetime')) probas_p_values.append(p_prob) lifetimes_p_values.append(p_lt) p_values = pd.DataFrame({ 'probabilities_p': probas_p_values, 'lifetimes_p': lifetimes_p_values }) p_values.to_csv( os.path.join(output_path, 'p_values_{}.csv'.format(n_clusters)))
def __init__(self, name): self.userPath = util.create_dir(name) self.index = 1
""" Project: Initial setup for Data Processing methods. Author: Goel, Ayush Date: 1st August 2019 """ from os import listdir from utilities import read_json_file, create_dir, LOGGER, MIN_DIM from utilities.config import PROCESSED_DATA_DIR, BBOX_IMAGES_PATH, RAW_TRAIN_IMAGES_PATH, IMG_INFO_JSON_PATH, \ NORMALISED_IMAGES_PATH, NORMALISED_BBOX_IMAGES_PATH IMG_INFO_JSON = read_json_file(IMG_INFO_JSON_PATH) ANNOTATIONS = IMG_INFO_JSON['annotations'] IMAGES = IMG_INFO_JSON['images'] IMAGES_IN_DIR = listdir(RAW_TRAIN_IMAGES_PATH) create_dir(PROCESSED_DATA_DIR)
from utilities import Configs from utilities import create_dir import data_precessor from openpyxl import load_workbook from urllib.request import urljoin from os import path, sep # add local modules into PATH dir_path = path.dirname(path.realpath(__file__)) tmp_data_dir = dir_path + sep + '../output_excel_files' create_dir(tmp_data_dir) # def write_in_single_file(urls): # tables = [] # for filename, url in urls.items(): # print("Extracting tables from {}".format(filename)) # soup = load_page(url) # tables += soup.findAll('table') # # # if tables: # # data_precessor.write_tables_to_excel(tables, 'output', 'www.unknownwebsite.com') def write_in_multiple_files(urls): files = [] for filename, url in urls.items(): print("Extracting tables from {}".format(filename)) try:
def run(self): self.parse_runlist() self.total_result = {} log_path = self.log_path + CURRENT_DATE self.result = log_path + '/' + 'result.log' if not utilities.is_path(log_path): utilities.create_dir(log_path) no_of_testcases = len(self.runlist_use) if no_of_testcases == 0: print "No testcases to run" sys.exit() count = 1 #Execution started, start time start_time = time.time() for each_test in self.runlist_use: if self.set_number: set_no = "_%s" % self.set_number log_no = 'Log' + str(count) + set_no else: log_no = 'Log' + str(count) log_suffix = log_path + '/' + log_no if type(each_test) is tuple: each_name, each_test = each_test[0], each_test[1] status = self.run_teuthology(each_test, log_suffix) if self.tls_obj: testcase_IDList = self.get_testcaseID(log_suffix) if testcase_IDList: for tcid in testcase_IDList: try: self.tls_obj.report_result(tcid, status, self.ceph_version) except Exception: print ("Error while reporting Testcases: " "%s to testlink" % tcid) try: each_name except Exception: pass else: each_test = each_name if not status: self.total_result["%s. %s" %(count,each_test)] = 'Fail' else: self.total_result["%s. %s" %(count,each_test)] = 'Pass' count+=1 #Exection completed, stop time stop_time = time.time() self.execution_time = int(stop_time - start_time) with open(self.result,'w') as result_handle: result_handle.write(pprint.pformat(self.total_result)) print pprint.pformat(self.total_result) #send mail if self.mail: self.sendmail() #stop all the nodes if not self.no_poweroff: self.poweroff_nodes() return
from utilities import create_dir from utilities import download_master_file from utilities import read_master_file import pandas as pd import requests http = 'http://' gz = '.fastq.gz' xml = '.xml' folders = create_dir('PRJNA422434.txt', 'T2D') download_master_file( 'https://www.ebi.ac.uk/ena/data/warehouse/filereport?accession=PRJNA422434&result=read_run&fields=sample_accession,secondary_sample_accession,tax_id,scientific_name,fastq_ftp&download=txt', folders[0]) download_lists = read_master_file(folders[0], '\t') #download fastq and metadata for i in range(len(download_lists[1])): fastq_ftp_str = '' fastq_ftp_list = [] fastq_ftp_str = download_lists[0][i] fastq_ftp_list = fastq_ftp_str.split(';') total_link_1 = http + str(fastq_ftp_list[0]) total_link_2 = http + str(fastq_ftp_list[1]) fastq_1_download = requests.get(total_link_1, allow_redirects=True) fastq_file_name_1 = folders[2] + str(download_lists[1][i]) + '_1' + gz open(fastq_file_name_1, 'wb').write(fastq_1_download.content) fastq_2_download = requests.get(total_link_2, allow_redirects=True) fastq_file_name_2 = folders[2] + str(download_lists[1][i]) + '_2' + gz open(fastq_file_name_2, 'wb').write(fastq_2_download.content)
from utilities import create_dir from utilities import download_master_file from utilities import read_master_file import pandas as pd import requests http = 'http://' gz = '.fastq.gz' xml = '.xml' folders = create_dir('PRJNA389280.txt', 'IBD') download_master_file( 'https://www.ebi.ac.uk/ena/data/warehouse/filereport?accession=PRJNA389280&result=read_run&fields=sample_accession,secondary_sample_accession,tax_id,scientific_name,fastq_ftp&download=txt', folders[0]) download_lists = read_master_file(folders[0], '\t') #download fastq and metadata for i in range(len(download_lists[1])): fastq_ftp_str = '' fastq_ftp_list = [] fastq_ftp_str = download_lists[0][i] fastq_ftp_list = fastq_ftp_str.split(';') total_link = http + str(fastq_ftp_list[0]) fastq_download = requests.get(total_link, allow_redirects=True) fastq_file_name = folders[2] + str(download_lists[1][i]) + gz open(fastq_file_name, 'wb').write(fastq_download.content) xml_location = 'https://www.ebi.ac.uk/ena/data/view/{}&display=xml'.format( download_lists[2][i]) xml_download = requests.get(xml_location, allow_redirects=True)