예제 #1
0
 def __init__(self, fastq, target_fa):
     self.fastq = fastq
     self.target_fa = target_fa
     self.target_cnt_dic = self.parseTargetList()
     self.target_len_dic = self.parseTargetList()
     Assm_view = Assm.AssemblyView(path)
     Assm_model = Assm.AssemblyModel(fastq, target_fa, Assm_view)
     self.Assm_controller = Assm.AssemblyController(Assm_model, Assm_view)
     self.pos_str = ''
     self.newline_str = '\n'
     self.newline_str = '*'
     output_path_tmp = os.path.basename(self.fastq).split('.')[0]
     self.output_path = '/home/swoo/AWS/output_pyspark_testing/' + output_path_tmp + '_' + str(
         os.getpid())
예제 #2
0
    def parseAndProcessCSVPyspark(self, line):
        if line.startswith('Features'):
            return '' # if header line, return
        molecule_model = Assm.MoleculeModel(self.Assm_controller.Assm_model)
        
        if self.fastq.endswith('.csv'):
            self.putIntoMolecule(line, molecule_model)
        elif self.fastq.endswith('.tsv'):
            self.putIntoMoleculeTSV(line, molecule_model)
        else: # Unknown file format
            return
            
        if self.Assm_controller.Assm_model.ParseModel.checkInsufficientSixmers(molecule_model):
            return ''
        
        origin_gene, MTM_ambig = self.Assm_controller.determineTargetGene(molecule_model)
        if MTM_ambig == True:
            self.Assm_controller.MTM_removed_cnt += 1
            return ''
        elif origin_gene.startswith('XXX') and not param.show_XXX_targets():
            self.Assm_controller.XXX_removed_cnt += 1
            return ''
        elif self.Assm_controller.Assm_model.ParseModel.checkInsufficientTargets(self.Assm_controller.Assm_model, molecule_model, origin_gene):
            return '' # Check if we have enough candidate targets
        
        molecule_model.initMutationGraph(origin_gene) # Charlie's Mutation-Graph-factory
        
        self.Assm_controller.estimateAllCov(molecule_model, origin_gene)
#        self.updateReadCounts(molecule_model, position) ### Update info ###
        
        if param.enable_blind_mu():
            self.Assm_controller.FindMutations(molecule_model, origin_gene)               
            molecule_model.Graph.GraphTrimming(self.Assm_controller.Assm_model.trimming_threshold)
            molecule_model.Graph.CallInsertions(self.Assm_controller.Assm_model.trimming_threshold)        
        
        if param.fast_path():
            molecule_model.Graph.grdQualityPath(0)
        else:
            molecule_model.Graph.optPath()            
        
        ### Update info ###
        position = ''
        self.updateInfo(molecule_model, position)
        
        FASTA_str = self.genFASTAstr(molecule_model.Graph, self.pos_str ,origin_gene)
        cov_list = self.genCOVstr(molecule_model.Graph)
#        cov_list = self.genCOVlist(molecule_model.Graph)
        VCF_list = ''
        VCF_list = ['']
        del molecule_model ### free molecule_model instance ###
#        cov_list = [int(1000*random.random()) for i in xrange(1)]
#        VCF_list = [int(1000*random.random()) for i in xrange(5)]
#        tmp_list2 = [int(1000*random.random()) for i in xrange(5)]
        return FASTA_str, origin_gene, cov_list, VCF_list
예제 #3
0
 def __init__(self, fastq, target_fa, path_inst):
     self.Assm_view = Assm.AssemblyView(path_inst)
     self.Assm_model = Assm.AssemblyModel(fastq, target_fa, self.Assm_view)
     self.Assm_controller = Assm.AssemblyController(self.Assm_model,
                                                    self.Assm_view)
예제 #4
0
        target_fa = 'target_sequences_genomic_dna.fa'
        target_fa = 'target_sequences_genomic_dna_XXX.fa'
        target_fa = 'target_sequences_barcode_test_short.fa'
        target_fa = 'target_sequences_genomic_dna_XXX_sep_wrong.fa'
        #target_fa = 'target_sequences_barcode_KRAS_COSM518_diff.fa'
        #target_fa = 'target_sequences_barcode_KRAS_COSM522_diff.fa'
        #target_fa = 'target_sequences_barcode_KRAS_COSM532_diff.fa'
        target_fa = 'target_sequences_genomic_dna_XXX_sep.fa'
        target_fa = 'target_sequences_barcode_test.fa'

    elif len(sys.argv) < 3:
        print('Insufficient arguments')
        print('Usage: python XXX.py <FASTQ> <Target_FASTA>')
        exit()
    elif len(sys.argv) >= 3:
        fastq = sys.argv[1]
        target_fa = sys.argv[2]

    fastq = os.path.join(path.input_dir, fastq)
    target_fa = os.path.join(path.input_dir, target_fa)

    Assm_view = Assm.AssemblyView(path)
    Assm_model = Assm.AssemblyModel(fastq, target_fa, Assm_view)
    Assm_controller = Assm.AssemblyController(Assm_model, Assm_view)

    start = time.time()
    Assm_controller.Process()  # Core module
    end = time.time()
    Assm_view.print_time(start, end)