def __init__(self, pos_fasta, neg_fasta, output_path, segmentation_schemes=10, topN=100): ''' ''' if not isinstance(pos_fasta, str): self.pos=pos_fasta elif pos_fasta.split('.')[-1]=='txt': self.pos=FileUtility.load_list(pos_fasta) elif pos_fasta.split('.')[-1]=='fasta': self.pos=FileUtility.read_fasta_sequences(pos_fasta) if not isinstance(neg_fasta, str): self.neg=neg_fasta elif neg_fasta.split('.')[-1]=='txt': self.neg=FileUtility.load_list(neg_fasta) elif neg_fasta.split('.')[-1]=='fasta': self.neg=FileUtility.read_fasta_sequences(neg_fasta) self.seqs=[seq.lower() for seq in self.pos+self.neg] self.labels=[1]*len(self.pos)+[0]*len(self.neg) self.segmentation_schemes=segmentation_schemes self.load_alpha_distribution() self.prepare_segmentations() print (output_path) FileUtility.ensure_dir(output_path) self.output_path=output_path self.motif_extraction(topN)
def generate_LR_important_features(self, clf_LR, feature_names, results_file, N=1000): ''' :param clf_logistic_regression: :param feature_names: :param results_file: :param N: :return: ''' results_file = results_file.replace( '/classifications/', '/feature_selection/classifications/') FileUtility.ensure_dir(results_file) file_name = results_file + '_LR' idxs = argsort(np.abs(clf_LR.coef_.tolist()[0]).tolist(), rev=True)[0:N] f = codecs.open(file_name, 'w') f.write('\t'.join(['feature', 'score']) + '\n') for idx in idxs: f.write('\t'.join( [feature_names[idx], str(clf_LR.coef_.tolist()[0][idx])]) + '\n') f.close()
def __init__(self, output_path): ''' Constructor ''' # set the parameters self.output_path = output_path FileUtility.ensure_dir(self.output_path + '/biblecom_intermediate/') FileUtility.ensure_dir(self.output_path + '/reports/')
def download_zipfile(self, url_outpath_rec): try: url, outpath, iso, code, langname = url_outpath_rec FileUtility.ensure_dir(outpath) r = requests.get(url) z = zipfile.ZipFile(io.BytesIO(r.content)) z.extractall(outpath) temp = PNGScriptRetrieve( (url, outpath, '../../' + iso + '_' + code.replace('_', '-') + '.png.txt'), crawl=False, parse=True) return url, [iso, code.replace('_', '-'), langname] except: return url, False
def __init__(self, output_path): ''' Constructor ''' # set the parameters self.output_path = output_path FileUtility.ensure_dir(self.output_path + '/pngscripture_intermediate/') FileUtility.ensure_dir(self.output_path + '/reports/') def warn(*args, **kwargs): pass import warnings warnings.warn = warn
def __init__(self, key, output_path): ''' Constructor ''' # set the parameters self.key = key self.output_path = output_path FileUtility.ensure_dir(self.output_path + '/api_intermediate/') FileUtility.ensure_dir(self.output_path + '/reports/') self.to_double_check = list() # check the API connection response = requests.get('https://dbt.io/api/apiversion?key=' + self.key + '&v=2') if response.status_code != 200: print('Enter a correct API code') return False else: response = json.loads(response.content) print('Connected successfully to the bible digital platform v ' + response['Version']) self.load_book_map()
def generate_RF_important_features(self, clf_random_forest, feature_names, results_file, N=1000): ''' :param clf_random_forest: :param feature_names: :param results_file: :param N: :return: ''' results_file = results_file.replace( '/classifications/', '/feature_selection/classifications/') FileUtility.ensure_dir(results_file) file_name = results_file + '_RF' clf_random_forest.fit(self.X, self.Y) std = np.std([ tree.feature_importances_ for tree in clf_random_forest.estimators_ ], axis=0) scores = { feature_names[i]: (s, std[i]) for i, s in enumerate(list(clf_random_forest.feature_importances_)) if not math.isnan(s) } scores = sorted(scores.items(), key=operator.itemgetter([1][0]), reverse=True)[0:N] f = codecs.open(file_name, 'w') f.write('\t'.join(['feature', 'score']) + '\n') for w, score in scores: #feature_array = self.X[:, feature_names.index(w)] #pos = [feature_array[idx] for idx, x in enumerate(self.Y) if x == 1] #neg = [feature_array[idx] for idx, x in enumerate(self.Y) if x == 0] f.write('\t'.join([str(w), str(score[0])]) + '\n') f.close()
def generate_tree(self, path, name): path_g = path + '/graphlan_files/' FileUtility.ensure_dir(path_g) font_map = {1: 15, 2: 14, 3: 13, 4: 12, 5: 8, 6: 7, 7: 4} taxonomy = self.get_pandas_df()['taxonomy'].tolist() direction = self.get_pandas_df()['direction'].tolist() taxlev = self.get_pandas_df()['taxonomylevel'].tolist() logpval = [ round(-np.log(x)) for x in self.get_pandas_df()['pvalue'].tolist() ] taxonomy = [ '.'.join(self.refine_ez_taxonomy(x).split(';')) for x in taxonomy ] tax_freq = dict(FreqDist(taxonomy).most_common()) logpval_frq = [tax_freq[x] for idx, x in enumerate(taxonomy)] #taxonomy=['.'.join(x[0:-1] if isGenomeName(x[-1]) else x) for x in taxonomy] dict_color = dict() for idx, x in enumerate(direction): if len(taxonomy[idx].split('.')) > 5: coloring = ('r' if x == '+' else ('b' if x == '-' else 'g')) if taxonomy[idx].split('.')[-1] in dict_color: dict_color[taxonomy[idx].split('.')[-1]].append(coloring) else: dict_color[taxonomy[idx].split('.')[-1]] = [coloring] new_dict_color = dict() for tax, colors in dict_color.items(): freq = FreqDist(colors) if freq['r'] / (freq['r'] + freq['b']) > 0.8: new_dict_color[tax] = 'r' elif freq['b'] / (freq['r'] + freq['b']) > 0.8: new_dict_color[tax] = 'b' else: new_dict_color[tax] = 'w' dict_color = new_dict_color annot = [ '\t'.join([ taxonomy[idx].split('.')[-1], 'annotation_background_color', dict_color[taxonomy[idx].split('.')[-1]] ]) for idx, x in enumerate(direction) if len(taxonomy[idx].split('.')) > 5 ] #annot=['\t'.join([taxonomy[idx].split('.')[-1],'annotation_background_color',('r' if x=='+' else ('b' if x=='-' else 'g'))]) for idx, x in enumerate(direction) if len(taxonomy[idx].split('.'))>5] annot = annot + [ '\t'.join([ taxonomy[idx].split('.')[-1], 'annotation_background_color', 'w' ]) for idx, x in enumerate(direction) if len(taxonomy[idx].split('.')) == 5 ] annot = annot + [ '\t'.join([ taxonomy[idx].split('.')[-1], 'annotation', taxonomy[idx].split('.')[-1] ]) for idx, x in enumerate(direction) if len(taxonomy[idx].split('.')) > 5 if not dict_color[taxonomy[idx].split('.')[-1]] == 'w' ] #annot=annot+['\t'.join([taxonomy[idx].split('.')[-1],'annotation_background_color','purple']) for idx, x in enumerate(direction) if len(taxonomy[idx].split('.'))>5] ## OUTER RINGS annot = annot + [ '\t'.join([ taxonomy[idx].split('.')[1], 'annotation', taxonomy[idx].split('.')[1] ]) for idx, x in enumerate(direction) if len(taxonomy[idx].split('.')) > 1 ] annot = annot + [ '\t'.join( [taxonomy[idx].split('.')[1], 'annotation_rotation', str(1)]) for idx, x in enumerate(direction) if len(taxonomy[idx].split('.')) > 1 ] annot = annot + [ '\t'.join( [taxonomy[idx].split('.')[1], 'annotation_font_size', str(9)]) for idx, x in enumerate(direction) if len(taxonomy[idx].split('.')) > 1 ] annot = annot + [ '\t'.join([ taxonomy[idx].split('.')[1], 'annotation_background_color', '#eedbfc' ]) for idx, x in enumerate(direction) if len(taxonomy[idx].split('.')) > 1 ] ## Clades annot = annot + [ '\t'.join([ taxonomy[idx].split('.')[-1], 'clade_marker_size', str(logpval_frq[idx]) ]) for idx, x in enumerate(direction) if len(taxonomy[idx].split('.')) > 5 if not dict_color[taxonomy[idx].split('.')[-1]] == 'w' ] annot = annot + [ '\t'.join([ taxonomy[idx].split('.')[-1], 'clade_marker_edge_width', str(logpval[idx]) ]) for idx, x in enumerate(direction) if len(taxonomy[idx].split('.')) > 5 if not dict_color[taxonomy[idx].split('.')[-1]] == 'w' ] annot = annot + [ '\t'.join( [taxonomy[idx].split('.')[-1], 'annotation_rotation', str(1)]) for idx, x in enumerate(direction) if len(taxonomy[idx].split('.')) > 5 if not dict_color[taxonomy[idx].split('.')[-1]] == 'w' ] annot = annot + [ '\t'.join([ taxonomy[idx].split('.')[-1], 'annotation_font_size', str(font_map[taxlev[idx]]) ]) for idx, x in enumerate(direction) if len(taxonomy[idx].split('.')) > 5 if not dict_color[taxonomy[idx].split('.')[-1]] == 'w' ] annot = annot + ['annotation_background_offset\t0.5'] annot = annot + ['clade_marker_edge_color\t#4f1a49'] annot = annot + ['branch_color\t#4f1a49'] annot = annot + ['annotation_background_separation\t-0.01'] annot = annot + ['annotation_background_width\t0.2'] #https://bitbucket.org/nsegata/graphlan/src/default/readme.txt?fileviewer=file-view-default #asgari@epsilon1:/mounts/data/proj/asgari/dissertation/libraries/graphlan$ python graphlan_annotate.py --annot ../annot.txt ../test.txt ../new.xml #asgari@epsilon1:/mounts/data/proj/asgari/dissertation/libraries/graphlan$ python graphlan.py ../new.xml image_name.pdf --dpi 1000 --size 15 --external_legends taxonomy = [ x for x in taxonomy if len(x.split('.')) > 5 if not dict_color[x.split('.')[-1]] == 'w' ] FileUtility.save_list(path_g + name + '_taxonomy.txt', taxonomy) FileUtility.save_list(path_g + name + '_annot.txt', annot) subprocess.call("python3 graphlan/graphlan_annotate.py --annot " + path_g + name + '_annot.txt' + " " + path_g + name + '_taxonomy.txt' + " " + path_g + name + '.xml', shell=True) subprocess.call("python3 graphlan/graphlan.py " + path_g + name + '.xml' + " " + path + name + '.pdf --dpi 1000 --size 15 --external_legends', shell=True) try: FileUtility.remove(path + name + '_legend.pdf') except: print('')
def predict_block(self, ultimate=False): ''' :return: ''' import warnings from sklearn.exceptions import DataConversionWarning, FitFailedWarning, UndefinedMetricWarning, ConvergenceWarning warnings.filterwarnings(action='ignore', category=DataConversionWarning) warnings.filterwarnings(action='ignore', category=FitFailedWarning) warnings.filterwarnings(action='ignore', category=DeprecationWarning) warnings.filterwarnings(action='ignore', category=UndefinedMetricWarning) warnings.filterwarnings(action='ignore', category=ConvergenceWarning) predict_blocks = self.xmldoc.getElementsByTagName('predict') predict_path=self.output+'/classifications/' # iterate over predict block for predict in predict_blocks: # Sub prediction FileUtility.ensure_dir(predict_path) setting_name=predict.attributes['name'].value subdir=predict_path+setting_name+'/' FileUtility.ensure_dir(subdir) ## label mapping labels=predict.getElementsByTagName('labels')[0].getElementsByTagName('label') mapping=dict() for label in labels: val=label.attributes['value'].value phenotype=label.firstChild.nodeValue.strip() mapping[phenotype]=int(val) ## optimizing for .. optimization=predict.getElementsByTagName('optimize')[0].firstChild.nodeValue.strip() ## number of folds self.cvbasis=predict.getElementsByTagName('eval')[0].firstChild.nodeValue.strip() folds=int(predict.getElementsByTagName('eval')[0].attributes['folds'].value) test_ratio=float(predict.getElementsByTagName('eval')[0].attributes['test'].value) if optimization not in ['accuracy','scores_r_1','scores_f1_1','scores_f1_0','f1_macro','f1_micro']: print ('Error in choosing optimization score') ## Genotype tables GPA=GenotypePhenotypeAccess(self.output) ## iterate over phenotypes if there exist more than one for phenotype in GPA.phenotypes: print ('working on phenotype ',phenotype) FileUtility.ensure_dir(subdir+phenotype+'/') ## create cross-validation FileUtility.ensure_dir(subdir+phenotype+'/cv/') cv_file='' cv_test_file='' if not ultimate: if self.cvbasis=='tree': FileUtility.ensure_dir(subdir+phenotype+'/cv/tree/') if self.override or not FileUtility.exists(subdir+phenotype+'/cv/tree/'+''.join([phenotype,'_',setting_name,'_folds.txt'])): GPA.create_treefold(subdir+phenotype+'/cv/tree/'+''.join([phenotype,'_',setting_name,'_folds.txt']), self.metadata_path + 'phylogentictree.txt', folds, test_ratio, phenotype, mapping) cv_file=subdir+phenotype+'/cv/tree/'+''.join([phenotype,'_',setting_name,'_folds.txt']) cv_test_file=subdir+phenotype+'/cv/tree/'+''.join([phenotype,'_',setting_name,'_test.txt']) else: FileUtility.ensure_dir(subdir+phenotype+'/cv/rand/') if self.override or not FileUtility.exists(subdir+phenotype+'/cv/rand/'+''.join([phenotype,'_',setting_name,'_folds.txt'])): GPA.create_randfold(subdir+phenotype+'/cv/rand/'+''.join([phenotype,'_',setting_name,'_folds.txt']), folds, test_ratio, phenotype, mapping) cv_file=subdir+phenotype+'/cv/rand/'+''.join([phenotype,'_',setting_name,'_folds.txt']) cv_test_file=subdir+phenotype+'/cv/rand/'+''.join([phenotype,'_',setting_name,'_test.txt']) features=[x.split('/')[-1].replace('_feature_vect.npz','') for x in FileUtility.recursive_glob(self.representation_path, '*.npz')] feature_combinations=[] ## TODO: ask as an input max_length_feature_comb = 3#len(features) for x in [[list(x) for x in list(itertools.combinations(features,r))] for r in range(3,max_length_feature_comb+1)]: feature_combinations+=x ## iterate over feature sets for feature_setting in feature_combinations: classifiers=[] for model in predict.getElementsByTagName('model'): for x in model.childNodes: if not x.nodeName=="#text": classifiers.append(x.nodeName) if not ultimate: X, Y, feature_names, final_strains = GPA.get_xy_prediction_mats(feature_setting, phenotype, mapping) feature_setting =[''.join(feature.split('.')[0:-1]) if len(feature.split('.'))>1 else feature for feature in feature_setting] feature_text='##'.join(feature_setting) ## iterate over classifiers for classifier in tqdm.tqdm(classifiers): basepath_cls=subdir+phenotype+'/'+feature_text+'_CV_'+self.cvbasis if classifier.lower()=='svm' and (not FileUtility.exists(basepath_cls+'_SVM.pickle') or self.override): Model = SVM(X, Y) Model.tune_and_eval_predefined(basepath_cls, final_strains, folds_file=cv_file, test_file=cv_test_file,njobs=self.cores, feature_names=feature_names, params=[{'C': [1000, 500, 200, 100, 50, 20, 10, 5, 2, 1, 0.2, 0.5, 0.01, 0.02, 0.05, 0.001]}]) if classifier.lower()=='rf' and (not FileUtility.exists(basepath_cls+'_RF.pickle') or self.override): Model = RFClassifier(X, Y) Model.tune_and_eval_predefined(basepath_cls, final_strains, folds_file=cv_file, test_file=cv_test_file,njobs=self.cores, feature_names=feature_names) if classifier.lower()=='lr' and (not FileUtility.exists(basepath_cls+'_LR.pickle') or self.override): Model = LogRegression(X, Y) Model.tune_and_eval_predefined(basepath_cls, final_strains, folds_file=cv_file, test_file=cv_test_file,njobs=self.cores, feature_names=feature_names) #if classifier.lower()=='dnn': # Model = DNN(X, Y) # Model.tune_and_eval(subdir+phenotype+'/'+'_'.join([feature]),njobs=self.cores, kfold=10) # generate selected features FileUtility.ensure_dir(self.output+'/'+'ultimate_outputs/') print ('Select the top markers..') generate_top_features(self.output, [x.upper() for x in classifiers], topk=200) FileUtility.ensure_dir(subdir+phenotype+'/'+'final_results/') #create_excel_file(subdir+phenotype+'/', subdir+phenotype+'/final_results/classification_res.xlsx') FileUtility.ensure_dir(self.output+'/'+'ultimate_outputs/')
def training_loop(**kwargs): run_parameters = kwargs['run_parameters'] model_paramters = kwargs['model_paramters'] model = eval(kwargs['deep_learning_model']) # which GPU to use os.environ["CUDA_VISIBLE_DEVICES"] = str(run_parameters['gpu']) # read files train_file = 'datasets/train.txt' test_file = 'datasets/test.txt' LD = LabelingData(train_file, test_file) train_lengths = [int(j) for j in FileUtility.load_list('/'.join(train_file.split('/')[0:-1]) + '/train_length.txt')] test_lengths = [int(i) for i in FileUtility.load_list('/'.join(test_file.split('/')[0:-1]) + '/test_length.txt')] # train/test batch parameters train_batch_size = run_parameters['train_batch_size'] test_batch_size = run_parameters['test_batch_size'] patience = run_parameters['patience'] epochs = run_parameters['epochs'] # model model, params = model(LD.n_classes, **model_paramters) # output directory FileUtility.ensure_dir('results/') FileUtility.ensure_dir('results/' + run_parameters['domain_name'] + '/') FileUtility.ensure_dir('results/' + run_parameters['domain_name'] + '/' + run_parameters['setting_name'] + '/') FileUtility.ensure_dir( 'results/' + run_parameters['domain_name'] + '/' + run_parameters['setting_name'] + '/' + params + '/') full_path = 'results/' + run_parameters['domain_name'] + '/' + run_parameters['setting_name'] + '/' + params + '/' # save model with open(full_path + 'config.txt', 'w') as fh: model.summary(print_fn=lambda x: fh.write(x + '\n')) # check points filepath = full_path + "/weights-improvement-{epoch:02d}-{weighted_acc:.3f}-{val_weighted_acc:.3f}.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_weighted_acc', verbose=1, save_best_only=True, mode='max', period=1) earlystopping = EarlyStopping(monitor='val_weighted_acc', min_delta=0, patience=patience, verbose=0, mode='max', baseline=None) callbacks_list = [checkpoint, earlystopping] # calculate the sizes steps_per_epoch = len(train_lengths) / train_batch_size if len(train_lengths) % train_batch_size == 0 else int( len(train_lengths) / train_batch_size) + 1 validation_steps = int(len(test_lengths) / test_batch_size) if len(test_lengths) % test_batch_size == 0 else int( len(test_lengths) / test_batch_size) + 1 # feed model h = model.fit_generator(train_batch_generator_408(train_batch_size), steps_per_epoch=steps_per_epoch, validation_data=validation_batch_generator_408(test_batch_size), validation_steps=validation_steps, shuffle=False, epochs=epochs, verbose=1, callbacks=callbacks_list) # Analysis of the performance pred_test = [(model.predict_on_batch(x),y,w) for x,y,w in tqdm.tqdm(validation_batches_fortest_408(1))] acc_test, conf_mat, conf_mat_column_mapping, contingency_metric, chi2_res_pval, gtest_res_pval = generate_report(pred_test) # save the history FileUtility.save_obj(full_path + 'history', h.history)
def biomarker_extraction(self, labeler, label_mapper, phenoname, p_value_threshold=0.05, pos_label=None, neg_label=None, excel=0): ''' :return: ''' print('\t✔ NPE Marker detection is started..') start = time.time() rep_base_path = self.output_directory_inter + 'npe_representation/' + self.dbname + '_uniquepiece_' + str( self.rep_sampling_depth) filenames = [ x.split('/')[-1] for x in FileUtility.load_list(rep_base_path + '_meta') ] # CHECK EXISTING LABELS if callable(labeler): selected_samples = [ idx for idx, file in enumerate(filenames) if labeler(file) in label_mapper ] else: selected_samples = [ idx for idx, file in enumerate(filenames) if labeler[file] in label_mapper ] if callable(labeler): Y = [ str(label_mapper[labeler(filenames[sample_id])]) for sample_id in selected_samples ] else: Y = [ str(label_mapper[labeler[filenames[sample_id]]]) for sample_id in selected_samples ] FileUtility.save_list(rep_base_path + '_' + phenoname + '_Y.txt', Y) DiTaxaWorkflow.ensure_dir(self.output_directory_inter + 'npe_marker_files/') if self.override == 1 or not DiTaxaWorkflow.exists( self.output_directory_inter + 'npe_marker_files/' + '_'.join([phenoname, 'chi2_relative.fasta'])): with warnings.catch_warnings(): warnings.simplefilter("ignore") G16s = NPEMarkerDetection( rep_base_path + '.npz', rep_base_path + '_' + phenoname + '_Y.txt', rep_base_path + '_features', self.output_directory_inter + 'npe_marker_files/' + phenoname, selected_samples) G16s.extract_markers() end = time.time() spent = end - start print('\t✔ biomarker extraction ' + phenoname + ' ' + str(spent) + ' seconds , using ' + str(self.num_p) + ' cores') self.log_file.append('biomarker extraction ' + phenoname + ' ' + str(spent) + ' seconds , using ' + str(self.num_p) + ' cores') else: print( '\t✔ Biomarker are already extracted. Thus, the statistical test was bypassed' ) self.log_file.append( ' Biomarker are already extracted. Thus, the statistical test was bypassed' ) FileUtility.save_list(self.output_directory + 'logfile.txt', self.log_file) print('\t✔ Taxonomic assignment of the markers..') if callable(labeler): phenotypes = [ labeler(filenames[sample_id]) for sample_id in selected_samples ] else: phenotypes = [ labeler[filenames[sample_id]] for sample_id in selected_samples ] fasta_file = self.output_directory_inter + 'npe_marker_files/' + phenoname + '_chi2_relative.fasta' matrix_path = rep_base_path + '.npz' feature_file_path = rep_base_path + '_features' if len(FileUtility.read_fasta_sequences(fasta_file)) > 2000: remove_redundants = False else: remove_redundants = True FileUtility.ensure_dir(self.output_directory + 'final_outputs/save_states/') if self.override == 1 or not DiTaxaWorkflow.exists( self.output_directory + 'final_outputs/save_states/' + phenoname + '.pickle'): start = time.time() Final_OBJ = NPEMarkerAnlaysis(fasta_file, matrix_path, feature_file_path, phenotypes, label_mapper, selected_samples, p_value_threshold=p_value_threshold, remove_redundants=remove_redundants, num_p=self.num_p, blastn_path=self.blastn_path) end = time.time() spent = end - start DiTaxaWorkflow.ensure_dir(self.output_directory + 'final_outputs/') FileUtility.save_obj( self.output_directory + 'final_outputs/save_states/' + phenoname, Final_OBJ) print('\t✔ Marker analysis and alignment ' + phenoname + ' ' + str(spent) + ' seconds, using ' + str(self.num_p) + 'cores') self.log_file.append('Marker analysis and alignment ' + phenoname + ' ' + str(spent) + ' seconds, using ' + str(self.num_p) + 'cores') else: Final_OBJ = FileUtility.load_obj(self.output_directory + 'final_outputs/save_states/' + phenoname + '.pickle') print('\t✔ The aligned markers already existed and are loaded!') self.log_file.append( 'The aligned markers already existed and are loaded!') FileUtility.save_list(self.output_directory + 'logfile.txt', self.log_file) # generating the tree Final_OBJ.generate_tree(self.output_directory + 'final_outputs/', phenoname) if excel == 1: print('\t✔ Creating marker excel file..') Final_OBJ.generate_excel( self.output_directory + 'final_outputs/' + phenoname + '.xlsx', phenoname) X_addr = self.output_directory_inter + 'npe_representation/' + self.dbname + '_uniquepiece_' + str( self.rep_sampling_depth) + '.npz' feature_addr = self.output_directory_inter + 'npe_representation/' + self.dbname + '_uniquepiece_' + str( self.rep_sampling_depth) + '_features' markers = self.output_directory_inter + 'npe_marker_files/' + phenoname + '_finalmarker_list.txt' Y = self.output_directory_inter + 'npe_representation/' + self.dbname + '_uniquepiece_' + str( self.rep_sampling_depth) + '_' + phenoname + "_Y.txt" print('\t✔ Creating t-sne plot..') DiTaxaWorkflow.plot_res(self.output_directory + 'final_outputs/' + phenoname + '_tsne.pdf', X_addr, feature_addr, markers, Y, labels=['Negative', 'Positive']) if pos_label and neg_label: print('\t✔ Creating marker heatmap..') Final_OBJ.update_matrix_by_markers_N() Final_OBJ.generate_heatmap(self.output_directory + 'final_outputs/' + phenoname + '_heatmap', pos_label=pos_label, neg_label=neg_label) if not excel == 1: print('\t✔ Creating t-sne plot..') DiTaxaWorkflow.plot_res(self.output_directory + 'final_outputs/' + phenoname + '_tsne.pdf', X_addr, feature_addr, markers, Y, labels=[neg_label, pos_label]) DiTaxaWorkflow.temp_cleanup() print( '\t⬛ Marker detection and analysis completed. You can find the results at ' + self.output_directory + ', in partuclar at final_outputs subdirectory.')