Exemple #1
0
 def __init__(self, edge_list_file, outdir, log_file):
     self.edge_list_file = edge_list_file
     self.cl_log = getLog(log_file, "Clustering")
     self.cl_log.info("CLUSTERING started ...")
     self.edge_list = self.getEdgeListFromFile()
     self.clustering_outTab = outdir + '/louv_clusering.tab'
     self.main()
Exemple #2
0
 def __init__(self, TH_path, input_fasta, outFasta, THthreads, log_file):
     self.TH_path, self.input_fasta = TH_path, input_fasta
     self.threads = THthreads
     self.outFasta = outFasta
     self.outTab = outFasta + '.tab'
     self.outFasta_all_monomersTH = outFasta + '_all_monomersTH.fasta'
     self.log_th = getLog(log_file, "TideHunter")
     self._run_TH()
     self._tab2fasta()
Exemple #3
0
    def __init__(self, out_dir, dir_clust, dir_canu,dir_reblast, opt_delete,log_file):
        self.out_dir=out_dir
        self.outdir_clust = dir_clust
        self.outdir_canu = dir_canu
        self.outdir_reblast=dir_reblast
        self.del_log = getLog(log_file, "DELETE")


        self.opt_delete = opt_delete
        self.del_dir()
        self.del_log.info("Exit.......\n Finished the work")
Exemple #4
0
 def __init__(self, TRF, consensus_name, outdir, log_name):
     self.outdir = outdir
     self.dir_trf = outdir + '/ReBlast/'
     self.run_TRF = TRF
     self.consensus_name = consensus_name
     self.file_num = self.dir_trf + '/TRF_seq_dr.fasta'
     self.filt_trf = self.dir_trf + '/seqFilt_trf.fasta'
     self.TRF_log = getLog(log_name, 'TRF')
     self.TRF_log.info("Module Run_TRF has started the job...")
     self.createdir()
     self.TRF()
     self.filt_tr()
Exemple #5
0
 def __init__(self,clustering_outTab,singleton_list,outdir,reads,THall,minAbundancy,log_file):
     self.minAbundancy = minAbundancy
     self.reads=reads
     self.singletonR=singleton_list
     self.clustering_outTab=clustering_outTab
     self.filtering_outTab=outdir+'/louv_clust_filtering.tab'
     self.clust_abund=outdir+'/clust_abund.tab'
     self.filt_log = getLog(log_file, "Filtering")
     self.filt_log.info("Filtering and preparing file with monomer sequences has started...")
     self.list_Rep=self.createListRep()
     self.THall_monomers=THall
     self.main(self.list_Rep)
 def __init__(self,canu,filtering_outTab,singleton_list,outFasta,outdir,log_file,min_overlap,consensus_name):
     self.filtering_outTab=filtering_outTab
     self.min_overlap=min_overlap
     self.consensus_name=consensus_name
     self.outdir=outdir
     self.outFasta=outFasta
     self.outdir_clust=outdir+'/clusters/'
     self.outdir_canu=outdir+'/canu/'
     self.canuRun=canu
     self.singleton_list=singleton_list
     self.canu_log = getLog(log_file, "Consensus assembly")
     self.canu_log.info("CONSENSUS ASSEMBLY has started...")
     self.createfile()
     self.dirFile_canu=self.runCanu()        
     self.writeFileCan()
Exemple #7
0
 def __init__(self, blast_run, makedb, threads, word_size, trf_file, outdir,
              abund_f, perc_abund, log_file):
     self.blast_run, self.makedb, self.outdir, self.threads, self.word_size = blast_run, makedb, outdir, threads, word_size
     self.trf_file = trf_file
     self.outdir_reblast = self.outdir + '/ReBlast/'
     self.out_blast = self.outdir_reblast + 'blast_sec.tab'
     self.out_clust = self.outdir_reblast + '/seq_clust.clst'
     self.nanoTRF = self.outdir + '/TR_nanotrf.fasta'
     self.end_nano = self.outdir + '/nanoTRF.fasta'
     self.Reclust_log = getLog(log_file, 'Reclustering')
     self.abund_f = abund_f
     self.perc_abund = perc_abund
     self.nanoTRF_abund = self.outdir + '/abund_nanotrf.tab'
     self.BLAST()
     self.list_BLAST = self.Blast_parsing()
     self.fasta_clust = self.createGraph(self.list_BLAST)
     self.filt_clust(self.fasta_clust)
     self.nano_end()
Exemple #8
0
def main():
    args = get_cmdline_args()
    w_TH=args.run_th
    outDirectory = '{0}/'.format(checkDir_or_create(args.out_directory))
    reads = args.reads
    log_file =outDirectory + args.log_file
    LOG = getLog(log_file, 'nanoTRF')
    LOG.info("nanoTRF started...")

    read_data = ''

    ####### TideHunter parametres ######
    """
    run TH, format tab to fasta ( variable self.outTH_fasta_name). 
    where sequence ids have view as follow: >readName*repN*consLen*copyNum
    """
    TH_path = args.path_TH
    threads = args.threads
    outTH_fasta_name = outDirectory + "TH.out.fasta"
    outFasta_all_monomersTH = ''
    TH_data = ''

    ####################################
    ##############BLAST#####################
    blast_run = args.blast
    makedb = args.makedb
    outFile = outDirectory + "blast.out"
    wordsize = args.wordsize
    evalue = args.evalue
    edge_list_after_blast_file = ''

    ##########################################
    ###############CLUSTERING################
    clustering_outTab = ''
    minAbundancy = args.max_abundancy
    ###############CANU#####################
    canu = args.canu
    min_overlap = args.min_Overlap
    consensus_name = outDirectory + args.consensus_name


    ###TRF###

    path_TR = args.TRF_run

    ### Reclustering###

    wordsize_f = args.wordsize_f
    perc_abund = args.perc_abund

    ###MAIN###

    ##READ PREPARATION##

    read_data = read_preparation.PrepareReads(reads)
    #########TH##########
    TH_outFasta=''
    TH_raw_tab=''
    TH_all_monomers=''

    if args.run_th:
       run_data=without_TH.without_TH(w_TH[0],outTH_fasta_name,log_file)
       TH_all_monomers=w_TH[1]
       TH_outFasta=run_data.outFasta
    else:
        TH_data = run_TideHunter.TideHunter_run(TH_path, read_data.read_file, outTH_fasta_name,
                                                threads, log_file)
        TH_raw_tab = TH_data.outTab
        TH_all_monomers = TH_data.outFasta_all_monomersTH
        TH_outFasta = TH_data.outFasta

    ##BLAST run###
    blast_module_data = run_BLAST.run_BLAST(blast_run, makedb, TH_outFasta, outFile,
                                            threads, wordsize, evalue, log_file)
    edge_list_after_blast_file = blast_module_data.edge_list_file
    singleton_list = blast_module_data.not_blast

    ##Clustering##
    louv_module_data = Louv_clustering.LouvClustering(edge_list_after_blast_file, outDirectory, log_file)

    ###Filtering##

    clustering_outTab = louv_module_data.clustering_outTab

    Filt_data = FilterRep.FilteringLouvTab(clustering_outTab, singleton_list, outDirectory, reads,
                                           TH_all_monomers, minAbundancy,log_file)
    tableFilt = Filt_data.filtering_outTab
    abund_tab = Filt_data.clust_abund

    ###Canu###
    consensus_out = Consensus_Assembly.ConsAssembly(canu, tableFilt, singleton_list,
                                                    TH_outFasta, outDirectory, log_file,
                                                    min_overlap, consensus_name)
    dir_clust = consensus_out.outdir_clust
    dir_canu = consensus_out.outdir_canu

    ###TRF###

    TRF_out = Run_TRF.Run_TRF(path_TR, consensus_name, outDirectory, log_file)
    re_blast = TRF_out.dir_trf
    trf_seq = TRF_out.filt_trf

    ###Reclustering###

    reclust_out = Reclustering.Reclustering(blast_run, makedb, threads, wordsize_f,trf_seq,
                                            outDirectory, abund_tab, perc_abund, log_file)
    nanoTRF_abund = reclust_out.nanoTRF_abund

    ###Delete directories###
    os.system('rm {0}*html'.format(outDirectory))
    del_log = getLog(log_file, "DELETE")
    if not args.dir_cleanup:
        del_log.info("Removing directories has started...")
        # Delete an entire directory tree - ./clust/, ./canu/ and ./ReBlast/
        shutil.rmtree(dir_canu)
        shutil.rmtree(dir_clust)
        shutil.rmtree(re_blast)
        # Delete an TRF html. reports and unnecessary BLAST files
        for file_t in os.listdir(outDirectory):
            if file_t != 'nanoTRF.fasta' or file_t != 'TH.out.fasta' or file_t != 'TH.out.fasta.tab' or file_t != 'TR_info.tab' and file_t != 'loging.log':
                path_t = outDirectory + file_t
                os.remove(path_t)
    else:
        del_log.info("Directories are not removed")
Exemple #9
0
 def __init__(self,blast_run,makedb,inFile, outFile, threads, wordsize, evalue, log_file):
     self.blast_run,self.makedb,self.inFile, self.outFile, self.threads, self.wordsize, self.evalue = blast_run,makedb,inFile, outFile, threads, wordsize, evalue
     self.bl_log = getLog(log_file, "BLAST module")
     self.not_blast=outFile+"_notBlast.list"
     self.edge_list_file = outFile + "edges.list"
     self.main()