def main(prj_dh,test=False): """ **--step 4**. Compares test (eg. treated) and control (eg. untreated) experiments. The output data is saved in `data_comparison` format as described in :ref:`io`. :param prj_dh: path to project directory. """ logging.info("start") if not exists(prj_dh) : logging.error("Could not find '%s'" % prj_dh) sys.exit() configure.main(prj_dh) from dms2dfe.tmp import info cores=info.cores # SET global variables global prj_dh_global prj_dh_global=prj_dh if exists('%s/cfg/comparison' % prj_dh): comparison_pairs_list =getusable_comparison_list(prj_dh) if test: pooled_data_fit2data_comparison(comparison_pairs_list[0]) else: pool=Pool(processes=int(cores)) pool.map(pooled_data_fit2data_comparison,comparison_pairs_list) pool.close(); pool.join() else : logging.warning("do not exist: cfg/comparison") data_fit_metrics=get_data_metrics(prj_dh) logging.shutdown()
def main(prj_dh): """ **--step 5**. Generates vizualizations. #. Scatter grid plots raw counts in replicates, if present. #. Mutation matrix. of frequencies of mutants (log scaled). #. Scatter plots of raw counts among selected and unselected samples #. Mutation matrix. of Fitness values. #. DFE plot. ie. Distribution of Fitness values for samples. #. Projections on PDB. Average of fitness values per residue are projected onto PDB file. :param prj_dh: path to project directory. """ logging.info("start") if not exists(prj_dh): logging.error("Could not find '%s'" % prj_dh) sys.exit() configure.main(prj_dh) from dms2dfe.tmp import info for type_form in ['aas', 'cds']: plots_dh = '%s/plots/%s' % (prj_dh, type_form) if not exists(plots_dh): makedirs(plots_dh) plot_coverage(info) plot_mutmap(info) plot_submap(info) plot_multisca(info) plot_pdb(info) plot_violin(info)
def main(prj_dh, test=False): """ **--step 4**. Compares test (eg. treated) and control (eg. untreated) experiments. The output data is saved in `data_comparison` format as described in :ref:`io`. :param prj_dh: path to project directory. """ logging.info("start") if not exists(prj_dh): logging.error("Could not find '%s'" % prj_dh) sys.exit() configure.main(prj_dh) from dms2dfe.tmp import info cores = info.cores # SET global variables global prj_dh_global prj_dh_global = prj_dh if exists('%s/cfg/comparison' % prj_dh): comparison_pairs_list = getusable_comparison_list(prj_dh) if test: pooled_data_fit2data_comparison(comparison_pairs_list[0]) else: pool = Pool(processes=int(cores)) pool.map(pooled_data_fit2data_comparison, comparison_pairs_list) pool.close() pool.join() else: logging.warning("do not exist: cfg/comparison") data_fit_metrics = get_data_metrics(prj_dh) logging.shutdown()
def main(prj_dh, test=False): """ **--step 1**. Processes alignment (.sam file) and produces codon level mutation matrix of counts of mutations. :param prj_dh: path to project directory. """ logging.info("start") # SET global variables global fsta_id, fsta_seqlen, fsta_seq, cds_ref, Q_cutoff, prj_dh_global prj_dh_global = prj_dh if not exists(prj_dh): logging.error("Could not find '%s'\n" % prj_dh) sys.exit() configure.main(prj_dh) from dms2dfe.tmp import info fsta_fh = info.fsta_fh Q_cutoff = int(info.Q_cutoff) cores = int(info.cores) samtools_fh = info.samtools_fh with open(fsta_fh, 'r') as fsta_data: for fsta_record in SeqIO.parse(fsta_data, "fasta"): fsta_id = fsta_record.id fsta_seq = str(fsta_record.seq) fsta_seqlen = len(fsta_seq) logging.info("ref name : '%s', length : '%d' " % (fsta_record.id, fsta_seqlen)) cds_ref = [] for cdi in range(len(fsta_seq) / 3): cds_ref.append(str(fsta_seq[cdi * 3:cdi * 3 + 3])) sbam_fhs = getusablesbams_list(prj_dh) # check if bams are indexed for sbam_fh in sbam_fhs: sbam_index_fh = "%s.bai" % sbam_fh log_fh = "%s.log" % sbam_index_fh log_f = open(log_fh, 'a') if not exists(sbam_index_fh): com = "%s index %s" % (samtools_fh, sbam_fh) subprocess.call(com, shell=True, stdout=log_f, stderr=subprocess.STDOUT) log_f.close() if len(sbam_fhs) != 0: if test: pooled(sbam_fhs[0]) else: pool = Pool(processes=int(cores)) # T : get it from xls pool.map(pooled, sbam_fhs) pool.close() pool.join() else: logging.info("already processed") logging.shutdown()
def main(prj_dh,test=False): """ **--step 1**. Processes alignment (.sam file) and produces codon level mutation matrix of counts of mutations. :param prj_dh: path to project directory. """ logging.info("start") # SET global variables global fsta_id,fsta_seqlen,fsta_seq,cds_ref,Q_cutoff,prj_dh_global prj_dh_global=prj_dh if not exists(prj_dh) : logging.error("Could not find '%s'\n" % prj_dh) sys.exit() configure.main(prj_dh) from dms2dfe.tmp import info fsta_fh=info.fsta_fh Q_cutoff=int(info.Q_cutoff) cores=int(info.cores) samtools_fh=info.samtools_fh with open(fsta_fh,'r') as fsta_data: for fsta_record in SeqIO.parse(fsta_data, "fasta") : fsta_id=fsta_record.id fsta_seq=str(fsta_record.seq) fsta_seqlen=len(fsta_seq) logging.info("ref name : '%s', length : '%d' " % (fsta_record.id, fsta_seqlen)) cds_ref=[] for cdi in range(len(fsta_seq)/3) : cds_ref.append(str(fsta_seq[cdi*3:cdi*3+3])) sbam_fhs=getusablesbams_list(prj_dh) # check if bams are indexed for sbam_fh in sbam_fhs: sbam_index_fh="%s.bai" % sbam_fh log_fh="%s.log" % sbam_index_fh log_f = open(log_fh,'a') if not exists(sbam_index_fh): com= "%s index %s" % (samtools_fh,sbam_fh) subprocess.call(com,shell=True,stdout=log_f, stderr=subprocess.STDOUT) log_f.close() if len(sbam_fhs)!=0: if test: pooled(sbam_fhs[0]) else: pool=Pool(processes=int(cores)) # T : get it from xls pool.map(pooled, sbam_fhs) pool.close(); pool.join() else: logging.info("already processed") logging.shutdown()
def main(prj_dh, test=False): """ **--step 0.2**. Preprocesses and aligns sequencing files. The steps and required dependendencies are following. .. code-block:: text Quality filtering : using Trimmomatic. Alignment : using bowtie2 .sam to .bam conversion : using samtools :param prj_dh: path to project directory """ logging.info("start") global trimmomatic_fh, fsta_fh, alignment_type, bt2_ref_fh, bowtie2_fh, samtools_fh, bowtie2_com if not exists(prj_dh): logging.error("Could not find '%s'\n" % prj_dh) sys.exit() configure.main(prj_dh) from dms2dfe.tmp import info fsta_fh = info.fsta_fh cores = info.cores trimmomatic_fh = info.trimmomatic_fh bowtie2_fh = info.bowtie2_fh trimmomatic_com = info.trimmomatic_com bowtie2_com = info.bowtie2_com samtools_fh = info.samtools_fh alignment_type = info.alignment_type # make bowtie index bt2_ref_fh = splitext(fsta_fh)[0] if not exists("%s.1.bt2" % bt2_ref_fh): bowtie_ref_com="%s-build --quiet %s %s &> %s.logbt2bld" \ % (bowtie2_fh,fsta_fh,splitext(bt2_ref_fh)[0],bt2_ref_fh) subprocess.call(bowtie_ref_com, shell=True) logging.info("bt2_ref_fh do not exist, made one.") fastqs_list = getusablefastqs_list(prj_dh) # print fastqs_list if len(fastqs_list) != 0: if test: for fastq in fastqs_list: pooled(fastq) else: pool = Pool(processes=int(cores)) pool.map(pooled, fastqs_list) pool.close() pool.join() else: logging.info("already processed") # cfg_h5.close() logging.shutdown()
def main(prj_dh,test=False): """ **--step 0.2**. Preprocesses and aligns sequencing files. The steps and required dependendencies are following. .. code-block:: text Quality filtering : using Trimmomatic. Alignment : using bowtie2 .sam to .bam conversion : using samtools :param prj_dh: path to project directory """ logging.info("start") global trimmomatic_fh,fsta_fh,alignment_type,bt2_ref_fh,bowtie2_fh,samtools_fh,bowtie2_com if not exists(prj_dh) : logging.error("Could not find '%s'\n" % prj_dh) sys.exit() configure.main(prj_dh) from dms2dfe.tmp import info fsta_fh=info.fsta_fh cores=info.cores trimmomatic_fh=info.trimmomatic_fh bowtie2_fh=info.bowtie2_fh trimmomatic_com=info.trimmomatic_com bowtie2_com=info.bowtie2_com samtools_fh=info.samtools_fh alignment_type=info.alignment_type # make bowtie index bt2_ref_fh = splitext(fsta_fh)[0] if not exists("%s.1.bt2" % bt2_ref_fh): bowtie_ref_com="%s-build --quiet %s %s &> %s.logbt2bld" \ % (bowtie2_fh,fsta_fh,splitext(bt2_ref_fh)[0],bt2_ref_fh) subprocess.call(bowtie_ref_com,shell=True) logging.info("bt2_ref_fh do not exist, made one.") fastqs_list=getusablefastqs_list(prj_dh) # print fastqs_list if len(fastqs_list)!=0: if test: for fastq in fastqs_list: pooled(fastq) else: pool=Pool(processes=int(cores)) pool.map(pooled,fastqs_list) pool.close(); pool.join() else: logging.info("already processed") # cfg_h5.close() logging.shutdown()
def main(prj_dh): """ **--step 0.3**. Extracts molecular features of the gene. The out files are created in `prj_dh/data_feats` The steps and required dependendencies are following. .. code-block:: text Secondary structure : using DSSP. Solvent Accessible Surface Area : using DSSP. Distance of a residue from reference atom: using Bio.PDB :param prj_dh: path to project directory. """ logging.basicConfig( format='[%(asctime)s] %(levelname)s from %(funcName)s:\t%(message)s', level=logging.DEBUG) # filename=cfg_xls_fh+'.log' logging.info("start") if not exists(prj_dh): logging.error("Could not find '%s'\n" % prj_dh) sys.exit() configure.main(prj_dh) from dms2dfe.tmp import info #FEATS PER POSITION data_feats_pos_fh = "%s/data_feats/aas/data_feats_pos" % prj_dh data_feats_pos = get_data_feats_pos(prj_dh, info, data_out_fh=data_feats_pos_fh) #FEATS PER SUBSTITUTION data_feats_sub_fh = "%s/data_feats/aas/data_feats_sub" % prj_dh data_feats_sub = get_data_feats_sub(data_out_fh=data_feats_sub_fh) #FEATS PER MUTATION data_feats_mut_fh = "%s/data_feats/aas/data_feats_mut" % prj_dh data_feats_mut = get_data_feats_mut(prj_dh, data_feats_mut_fh, info) #FEATS ALL data_feats_all_fh = "%s/data_feats/aas/data_feats_all" % prj_dh data_feats_all = get_data_feats_all(data_feats_mut_fh, data_feats_pos_fh, data_feats_sub_fh, data_feats_all_fh, info) #back compatibility feats_all_fh = "%s/data_feats/aas/feats_all" % prj_dh if not data_feats_pos is None: data_feats_pos.to_csv(feats_all_fh) logging.shutdown()
def main(prj_dh): """ **--step 0.1**. Demultipexes .fastq files based on barcodes located at `prj_dh/cfg/barcodes`. :param prj_dh: path to project directory """ logging.info("start") if not exists(prj_dh) : logging.error("Could not find '%s'\n" % prj_dh) sys.exit() configure.main(prj_dh) from dms2dfe.tmp import info trimmomatic_fh=info.trimmomatic_fh if exists('%s/cfg/barcodes' % prj_dh) : barcodes=pd.read_csv(prj_dh+'/cfg/barcodes') # print barcodes if len(barcodes)!=0: fastq_R1_fhs=[str(s) for s in barcodes.loc[:,'fastq_R1_fh'].unique()] fastq_fhs_list=[[s, s.replace('R1','R2')] for s in fastq_R1_fhs if not exists("%s.qcd.fastq" % s)] # pairs for fastq_fhs_tp in fastq_fhs_list: fastq2qcd(fastq_fhs_tp,trimmomatic_fh) # fastq_R1_fhs=[fastq_R1_fh+".qcd.fastq" for fastq_R1_fh in fastq_R1_fhs if not ".qcd." in fastq_R1_fh] barcodes=barcodes.set_index('fastq_R1_fh') for fastq_R1_fh in fastq_R1_fhs: if exists(fastq_R1_fh): fastq_R1_fh_barcodes=barcodes.loc[fastq_R1_fh,:] fastq_R2_fh=fastq_R1_fh.replace('R1','R2') #str(fastq_R1_fh_barcodes.ix[0,'fastq_R2_fh']) if (not exists("%s.qcd.fastq_unresolved_joined.qcd.fastq" % (fastq_R1_fh)))\ and (not exists("%s.qcd.fastq_unresolved_joined.qcd.fastq" % (fastq_R1_fh))): if exists(fastq_R1_fh): logging.info("processing: %s" % basename(fastq_R1_fh)) # print fastq_R1_fh_barcodes barcode_R1s=[str(s) for s in list(fastq_R1_fh_barcodes.loc[:,'barcode_R1'])] barcode_R2s=[str(s) for s in list(fastq_R1_fh_barcodes.loc[:,'barcode_R2'])] fastq_fns =[str(s) for s in list(fastq_R1_fh_barcodes.loc[:,'fastq_fn'])] fastq2dplx(fastq_R1_fh+".qcd.fastq",fastq_R2_fh+".qcd.fastq",\ barcode_R1s,barcode_R2s,fastq_fns) else: logging.info("fastq_R2_fh do not exist: %s" % fastq_R2_fh) else: logging.info("already done : %s" % fastq_R1_fh) else: logging.info("fastq_R1_fh do not exist: %s" % fastq_R1_fh) else: logging.info("skipping: because barcodes not present in cfg") else: logging.info("skipping: because barcodes not present in cfg") logging.shutdown()
def pipeline(prj_dh,step=None,test=False): from dms2dfe import configure, ana0_fastq2dplx,ana0_fastq2sbam,ana0_getfeats,ana1_sam2mutmat,ana2_mutmat2fit,ana3_fit2comparison,ana4_modeller,ana4_plotter if exists(prj_dh): if step==0 or step==None: configure.main(prj_dh,"deps") configure.main(prj_dh) if step==0.1 or step==None: ana0_fastq2dplx.main(prj_dh) if step==0.2 or step==None: ana0_fastq2sbam.main(prj_dh,test) if step==0.3: ana0_getfeats.main(prj_dh) if step==1 or step==None: ana1_sam2mutmat.main(prj_dh) if step==2 or step==None: ana2_mutmat2fit.main(prj_dh,test) if step==3 or step==None: ana0_getfeats.main(prj_dh) ana4_modeller.main(prj_dh,test) if step==4 or step==None: ana3_fit2comparison.main(prj_dh,test) if step==5 or step==None: ana0_getfeats.main(prj_dh) ana4_plotter.main(prj_dh) if step==None: logging.info("Location of output data: %s/plots/aas/data_comparison" % (prj_dh)) logging.info("Location of output visualizations: %s/plots/aas/" % (prj_dh)) logging.info("For information about file formats of outputs, refer to http://kc-lab.github.io/dms2dfe/io .") else: configure.main(prj_dh) logging.shutdown()
def main(prj_dh): """ **--step 0.1**. Demultipexes .fastq files based on barcodes located at `prj_dh/cfg/barcodes`. :param prj_dh: path to project directory """ logging.info("start") if not exists(prj_dh): logging.error("Could not find '%s'\n" % prj_dh) sys.exit() configure.main(prj_dh) from dms2dfe.tmp import info trimmomatic_fh = info.trimmomatic_fh if exists('%s/cfg/barcodes' % prj_dh): barcodes = pd.read_csv(prj_dh + '/cfg/barcodes') # print barcodes if len(barcodes) != 0: fastq_R1_fhs = [ str(s) for s in barcodes.loc[:, 'fastq_R1_fh'].unique() ] fastq_fhs_list = [[s, s.replace('R1', 'R2')] for s in fastq_R1_fhs if not exists("%s.qcd.fastq" % s)] # pairs for fastq_fhs_tp in fastq_fhs_list: fastq2qcd(fastq_fhs_tp, trimmomatic_fh) # fastq_R1_fhs=[fastq_R1_fh+".qcd.fastq" for fastq_R1_fh in fastq_R1_fhs if not ".qcd." in fastq_R1_fh] barcodes = barcodes.set_index('fastq_R1_fh') for fastq_R1_fh in fastq_R1_fhs: if exists(fastq_R1_fh): fastq_R1_fh_barcodes = barcodes.loc[fastq_R1_fh, :] fastq_R2_fh = fastq_R1_fh.replace( 'R1', 'R2') #str(fastq_R1_fh_barcodes.ix[0,'fastq_R2_fh']) if (not exists("%s.qcd.fastq_unresolved_joined.qcd.fastq" % (fastq_R1_fh)))\ and (not exists("%s.qcd.fastq_unresolved_joined.qcd.fastq" % (fastq_R1_fh))): if exists(fastq_R1_fh): logging.info("processing: %s" % basename(fastq_R1_fh)) # print fastq_R1_fh_barcodes barcode_R1s = [ str(s) for s in list( fastq_R1_fh_barcodes.loc[:, 'barcode_R1']) ] barcode_R2s = [ str(s) for s in list( fastq_R1_fh_barcodes.loc[:, 'barcode_R2']) ] fastq_fns = [ str(s) for s in list( fastq_R1_fh_barcodes.loc[:, 'fastq_fn']) ] fastq2dplx(fastq_R1_fh+".qcd.fastq",fastq_R2_fh+".qcd.fastq",\ barcode_R1s,barcode_R2s,fastq_fns) else: logging.info("fastq_R2_fh do not exist: %s" % fastq_R2_fh) else: logging.info("already done : %s" % fastq_R1_fh) else: logging.info("fastq_R1_fh do not exist: %s" % fastq_R1_fh) else: logging.info("skipping: because barcodes not present in cfg") else: logging.info("skipping: because barcodes not present in cfg") logging.shutdown()
def main(prj_dh, test=False, ml=False): """ **--step 3**. Identifies molecular features that may determine fitness scores. This plots the results in following visualisations. .. code-block:: text ROC plots Relative importances of features :param prj_dh: path to project directory. """ logging.info("start") if not exists(prj_dh): logging.error("Could not find '%s'" % prj_dh) sys.exit() configure.main(prj_dh) from dms2dfe.tmp import info from dms2dfe.tmp import info from dms2dfe.lib.io_ml import corrplot corrplot(info) if ml: from dms2dfe.lib.io_dfs import set_index from dms2dfe.lib.io_ml import data_fit2ml #,get_cols_del,make_data_combo,data_combo2ml cores = int(info.cores) if hasattr(info, 'mut_type'): mut_type = info.mut_type else: mut_type = 'single' if hasattr(info, 'ml_input'): if info.ml_input == 'FC': ml_input = 'FCA_norm' elif info.ml_input == 'Fi': ml_input = 'FiA' else: ml_input = 'FCA_norm' type_form = "aas" if not exists("%s/plots/%s" % (prj_dh, type_form)): makedirs("%s/plots/%s" % (prj_dh, type_form)) if not exists("%s/data_ml/%s" % (prj_dh, type_form)): makedirs("%s/data_ml/%s" % (prj_dh, type_form)) data_feats = pd.read_csv("%s/data_feats/aas/data_feats_all" % (prj_dh)) if mut_type == 'single': data_fit_keys = ["data_fit/%s/%s" % (type_form,basename(fh)) \ for fh in glob("%s/data_fit/aas/*" % prj_dh) \ if (not "inferred" in basename(fh)) and ("_WRT_" in basename(fh))] data_fit_keys = np.unique(data_fit_keys) if len(data_fit_keys) != 0: if test: pooled_io_ml(data_fit_keys[0]) # for data_fit_key in data_fit_keys: # pooled_io_ml(data_fit_key) else: for data_fit_key in data_fit_keys: pooled_io_ml(data_fit_key) # pool_io_ml=Pool(processes=int(cores)) # pool_io_ml.map(pooled_io_ml,data_fit_keys) # pool_io_ml.close(); pool_io_ml.join() else: logging.info("already processed") elif mut_type == 'double': data_feats = set_index(data_feats, 'mutids') data_fit_dh = 'data_fit_dm' data_fit_keys = ["%s/%s/%s" % (data_fit_dh,type_form,basename(fh)) \ for fh in glob("%s/%s/aas/*" % (prj_dh,data_fit_dh)) \ if (not "inferred" in basename(fh)) and ("_WRT_" in basename(fh))] data_fit_keys = np.unique(data_fit_keys) ycol = ml_input Xcols = data_feats.columns if len(data_fit_keys) != 0: for data_fit_key in data_fit_keys: data_fit_dm_fh = '%s/%s' % (prj_dh, data_fit_key) data_combo_fh = '%s/data_ml/aas/%s.combo' % ( prj_dh, basename(data_fit_dm_fh)) force = False if not exists(data_combo_fh) or force: data_fit_dm = pd.read_csv(data_fit_dm_fh).set_index( 'mutids') data_combo = make_data_combo(data_fit_dm, data_feats, ycol, Xcols) if not exists(dirname(data_combo_fh)): makedirs(dirname(data_combo_fh)) data_combo.to_csv(data_combo_fh) else: data_combo = pd.read_csv(data_combo_fh).set_index( 'mutids') logging.info('ml: start') data_combo2ml( data_combo, basename(data_fit_dm_fh), dirname(data_combo_fh), dirname(data_combo_fh), ycoln=ycol, col_idx='mutids', ml_type='cls', middle_percentile_skipped=0.1, force=False, ) def pooled_io_ml(data_fit_key): """ This module makes use of muti threading to speed up `dms2dfe.lib.io_ml.data_fit2ml`. :param data_fit_key: in the form <data_fit>/<aas/cds>/<name of file>. """ from dms2dfe.tmp import info dX_fh = "%s/data_feats/aas/data_feats_all" % (info.prj_dh) dy_fh = '%s/%s' % (info.prj_dh, data_fit_key) logging.info('processing: %s' % basename(dy_fh)) data_fit2ml(dX_fh, dy_fh, info, regORcls='cls') logging.shutdown()
def main(prj_dh,test=False): """ **--step 2**. Converts mutation matrices (.mat files produced in upstream ana1_sam2mutmat module) and calculates the fitness scores. The output data is saved in `data_fit` format as described in :ref:`io`. :param prj_dh: path to project directory. """ logging.info("start") if not exists(prj_dh) : logging.error("Could not find '%s'" % prj_dh) sys.exit() configure.main(prj_dh) global prj_dh_global,host,norm_type,fsta_len,cctmr_global,output_dh,prj_dh_global,lbls,Ni_cutoff,fsta_fh_global,clips from dms2dfe.tmp import info fsta_fh=info.fsta_fh cctmr=info.cctmr host=info.host cores=info.cores transform_type=info.transform_type norm_type=info.norm_type fsta_len=info.fsta_len Ni_cutoff=int(info.Ni_cutoff) rscript_fh=info.rscript_fh if hasattr(info, 'mut_type'): mut_type=info.mut_type else: mut_type='single' lbls=pd.read_csv(prj_dh+'/cfg/lbls') lbls=lbls.set_index('varname') # SET global variables prj_dh_global=prj_dh fsta_fh_global=fsta_fh if cctmr != 'nan': cctmr=[int("%s" % i) for i in cctmr.split(" ")] cctmr_global=[(cctmr[0],cctmr[1]),(cctmr[2],cctmr[3])] else: cctmr_global=None if info.clips != 'nan': clips=[int(s) for s in info.clips.split(' ')] else: clips=None if mut_type=='single': lbls_list=getusable_lbls_list(prj_dh) if len(lbls_list)!=0: if test: pooled_mut_mat_cds2data_lbl(lbls_list[0]) else: pool_mut_mat_cds2data_lbl=Pool(processes=int(cores)) pool_mut_mat_cds2data_lbl.map(pooled_mut_mat_cds2data_lbl,lbls_list) pool_mut_mat_cds2data_lbl.close(); pool_mut_mat_cds2data_lbl.join() else: logging.info("already processed: mut_mat_cds2data_lbl") #TRANSFORM if (transform_type=='rlog') or (transform_type=='vst'): logging.info("transforming frequencies: %s" % transform_type) transform_data_lbl_deseq(prj_dh,transform_type,rscript_fh) else: logging.info("transforming frequencies: %s" % transform_type) transform_data_lbl(prj_dh,transform_type) #FITNESS fits_pairs_list=getusable_fits_list(prj_dh,data_fit_dh='data_fit') if len(fits_pairs_list)!=0: if test: # pooled_data_lbl2data_fit(fits_pairs_list[0]) for fits_pairs in fits_pairs_list: pooled_data_lbl2data_fit(fits_pairs) else: pool_data_lbl2data_fit=Pool(processes=int(cores)) pool_data_lbl2data_fit.map(pooled_data_lbl2data_fit, fits_pairs_list) pool_data_lbl2data_fit.close(); pool_data_lbl2data_fit.join() else: logging.info("already processed: data_lbl2data_fit") elif mut_type=='double': fits_pairs_list_dm=getusable_fits_list(prj_dh,data_fit_dh='data_fit_dm') if len(fits_pairs_list_dm)!=0: if test: data_lbl2data_fit_dm(fits_pairs_list_dm[0],prj_dh,data_lbl_dh='data_lbl_dm', data_fit_dh='data_fit_dm') else: for fits_pairs in fits_pairs_list_dm: data_lbl2data_fit_lite(fits_pairs,prj_dh,data_lbl_dh='data_lbl_dm', data_fit_dh='data_fit_dm') else: logging.info("already processed: data_lbl2data_fit") logging.shutdown()
def test_configure(): from dms2dfe import configure configure.main('prj')