def __init__(self, fastq, target_fa): self.fastq = fastq self.target_fa = target_fa self.target_cnt_dic = self.parseTargetList() self.target_len_dic = self.parseTargetList() Assm_view = Assm.AssemblyView(path) Assm_model = Assm.AssemblyModel(fastq, target_fa, Assm_view) self.Assm_controller = Assm.AssemblyController(Assm_model, Assm_view) self.pos_str = '' self.newline_str = '\n' self.newline_str = '*' output_path_tmp = os.path.basename(self.fastq).split('.')[0] self.output_path = '/home/swoo/AWS/output_pyspark_testing/' + output_path_tmp + '_' + str( os.getpid())
def parseAndProcessCSVPyspark(self, line): if line.startswith('Features'): return '' # if header line, return molecule_model = Assm.MoleculeModel(self.Assm_controller.Assm_model) if self.fastq.endswith('.csv'): self.putIntoMolecule(line, molecule_model) elif self.fastq.endswith('.tsv'): self.putIntoMoleculeTSV(line, molecule_model) else: # Unknown file format return if self.Assm_controller.Assm_model.ParseModel.checkInsufficientSixmers(molecule_model): return '' origin_gene, MTM_ambig = self.Assm_controller.determineTargetGene(molecule_model) if MTM_ambig == True: self.Assm_controller.MTM_removed_cnt += 1 return '' elif origin_gene.startswith('XXX') and not param.show_XXX_targets(): self.Assm_controller.XXX_removed_cnt += 1 return '' elif self.Assm_controller.Assm_model.ParseModel.checkInsufficientTargets(self.Assm_controller.Assm_model, molecule_model, origin_gene): return '' # Check if we have enough candidate targets molecule_model.initMutationGraph(origin_gene) # Charlie's Mutation-Graph-factory self.Assm_controller.estimateAllCov(molecule_model, origin_gene) # self.updateReadCounts(molecule_model, position) ### Update info ### if param.enable_blind_mu(): self.Assm_controller.FindMutations(molecule_model, origin_gene) molecule_model.Graph.GraphTrimming(self.Assm_controller.Assm_model.trimming_threshold) molecule_model.Graph.CallInsertions(self.Assm_controller.Assm_model.trimming_threshold) if param.fast_path(): molecule_model.Graph.grdQualityPath(0) else: molecule_model.Graph.optPath() ### Update info ### position = '' self.updateInfo(molecule_model, position) FASTA_str = self.genFASTAstr(molecule_model.Graph, self.pos_str ,origin_gene) cov_list = self.genCOVstr(molecule_model.Graph) # cov_list = self.genCOVlist(molecule_model.Graph) VCF_list = '' VCF_list = [''] del molecule_model ### free molecule_model instance ### # cov_list = [int(1000*random.random()) for i in xrange(1)] # VCF_list = [int(1000*random.random()) for i in xrange(5)] # tmp_list2 = [int(1000*random.random()) for i in xrange(5)] return FASTA_str, origin_gene, cov_list, VCF_list
def __init__(self, fastq, target_fa, path_inst): self.Assm_view = Assm.AssemblyView(path_inst) self.Assm_model = Assm.AssemblyModel(fastq, target_fa, self.Assm_view) self.Assm_controller = Assm.AssemblyController(self.Assm_model, self.Assm_view)
target_fa = 'target_sequences_genomic_dna.fa' target_fa = 'target_sequences_genomic_dna_XXX.fa' target_fa = 'target_sequences_barcode_test_short.fa' target_fa = 'target_sequences_genomic_dna_XXX_sep_wrong.fa' #target_fa = 'target_sequences_barcode_KRAS_COSM518_diff.fa' #target_fa = 'target_sequences_barcode_KRAS_COSM522_diff.fa' #target_fa = 'target_sequences_barcode_KRAS_COSM532_diff.fa' target_fa = 'target_sequences_genomic_dna_XXX_sep.fa' target_fa = 'target_sequences_barcode_test.fa' elif len(sys.argv) < 3: print('Insufficient arguments') print('Usage: python XXX.py <FASTQ> <Target_FASTA>') exit() elif len(sys.argv) >= 3: fastq = sys.argv[1] target_fa = sys.argv[2] fastq = os.path.join(path.input_dir, fastq) target_fa = os.path.join(path.input_dir, target_fa) Assm_view = Assm.AssemblyView(path) Assm_model = Assm.AssemblyModel(fastq, target_fa, Assm_view) Assm_controller = Assm.AssemblyController(Assm_model, Assm_view) start = time.time() Assm_controller.Process() # Core module end = time.time() Assm_view.print_time(start, end)