def main(args, config): wDir = os.getcwd() #Instance Preprocessing class window = Preprocessing(args.fasta_file, config['win_length'], config['win_step']) window.output_window() print >> sys.stderr, "Creating windows_sequence.fasta" #Instance Similarity and Composition class sim = Similarity(args.fasta_file, config['score_adj'],wDir) sim_matrix = sim.mcl_perform() comp_results = Composition(config['kmer_len']) comp_matrix = comp_results.joined() #Join similarity and composition matrix for PCA join = pd.concat([comp_matrix, sim_matrix], axis= 1, join='inner') print >> sys.stderr, "Calculating similarity and composition matrix" #Instance Reduction class pca = Reduction(join, config['pca_comp']) pca_data = pca.perform_pca() print >> sys.stderr, "Performing PCA" #Instance Clustering class cluster = Clustering(pca_data) clust_obj = cluster.plot() print >> sys.stderr, "Performing clustering plot" #Instance ClusterReport class report = ClusterReport(clust_obj) file_name, querySeq = report.output_queryseq() print >> sys.stderr, "Doing report of clusters" #Instance Validate class valid = Validate(file_name, args.fasta_file,wDir) jfileComp, jfileMinus = valid.roundTwo() print >> sys.stderr, "Validation of results" #Instance ParseJplace Class parsing = ParseJplace(jfileComp, jfileMinus) corrMat = parsing.correlation() print >> sys.stderr, "Doing profiles" #Instance Profile Class ttest = Profiles(corrMat, querySeq) bestWin = ttest.windowsAssigment() print >>sys.stderr, "Doing permutations" #Instance StatsBinom finalResult = StatsBinom(args.fasta_file, config['win_length'],bestWin) finalResult.binomial() cleaning(file_name)
def basicMode(config, fasta_file, profilePath): #create output folders wDir = os.getcwd() folders = ['pplacer', 'testing'] for folder in folders: os.mkdir(os.path.join(wDir, folder)) #Instance Preprocessing class window = Preprocessing(fasta_file, config['win_length'], config['win_step'], "windows_sequence.fasta") window.output_window() reverseSeq = Preprocessing(fasta_file, config['win_length'], config['win_step'], "reverse_windows.fasta") reverseSeq.output_window() print >> sys.stderr, "Creating windows_sequence.fasta" #Instance Similarity and Composition class sim = Similarity(fasta_file, config['score_adj'], wDir) sim_matrix = sim.mcl_perform() comp_results = Composition(config['kmer_len']) comp_matrix = comp_results.joined() #Join similarity and composition matrix for PCA join = pd.concat([comp_matrix, sim_matrix], axis=1, join='inner') print >> sys.stderr, "Calculating similarity and composition matrix" #Instance Reduction class pca = Reduction(join, config['pca_comp']) pca_data = pca.perform_pca() print >> sys.stderr, "Performing PCA" #Instance Clustering class cluster = Clustering(pca_data) clust_obj = cluster.plot() print >> sys.stderr, "Performing clustering plot" #Instance ClusterReport class report = ClusterReport(clust_obj) file_name, querySeq = report.output_queryseq() print >> sys.stderr, "Doing report of clusters" #Instance Validate class valid = Validate(file_name, fasta_file, wDir) jfileComp, jfileMinus = valid.roundTwo() print >> sys.stderr, "Validation of results" #Instance ParseJplace Class parsing = ParseJplace(jfileComp, jfileMinus) corrMat = parsing.correlation() print >> sys.stderr, "Doing profiles" #Instance Profile Class ttest = Profiles(corrMat, querySeq, wDir, profilePath) bestWin = ttest.windowsAssigment() print >> sys.stderr, "Doing permutations" #Instance StatsBinom finalResult = StatsBinom(fasta_file, config['win_length'], bestWin) finalResult.binomial() print >> sys.stderr, "Calculating p-value" cleaning(file_name)