Ejemplo n.º 1
0
    def match_fasta(self):
        files = myIO.dir_os(self.par['dir_out']).incrusive_files()
        #select a fasta file
        fa_files = filter(lambda x: x.endswith(('.fa', '.fasta')), files)
        self.par['match_fa'] = mySystem.system().select_key(fa_files)
        #select a gtf or gff file
        gtf_files = filter(lambda x: x.endswith(('.gtf', '.gff3')), files)
        self.par['match_gtf'] = mySystem.system().select_key(gtf_files)

        #match
        if par['web_site'] == 'ENSEML':
            myGenome.genome(par['match_fa']).match_ensembl_fa(par['match_gtf'])
        elif par['web_site'] == 'NCBI':
            myGenome.genome(par['match_fa']).match_ncbi_fa(par['match_gtf'])
Ejemplo n.º 2
0
    def phipseq_alignment(self, sample_name):
        print('\n######Anslysis of {} will be trigerred!#####'.format(
            sample_name))
        #initiate sample par
        sample_var = dict(self.par)
        sample_var['start_time'] = time.time()
        #sample name
        sample_var['sample_name'] = sample_name
        #sample directory
        sample_dir = self.par['sample_dirs'][sample_name]
        sample_var['sample_dir'] = myIO.dir_os(sample_dir).create_dir()
        print('\tSample directory: ', sample_var['sample_dir'])
        #raw data
        sample_var['sample_raw_files'] = ','.join(
            sample_var['sample_to_raw'][sample_name])
        print('\tRaw files: ', sample_var['sample_raw_files'])
        #export
        sample_var['file_head'] = sample_var['sample_dir'] + sample_name
        #default same file
        sample_var['sample_sam_file'] = sample_var['file_head'] + '.sam'
        #file of read counts
        sample_var['sample_RC_file'] = sample_var['file_head'] + '_RC.txt'
        sample_var['sample_pro_sumRC_file'] = sample_var[
            'file_head'] + '_pro_sumRC.txt'
        sample_var['sample_pro_maxRC_file'] = sample_var[
            'file_head'] + '_pro_maxRC.txt'
        #file for saturation analysis
        sample_var['sample_saturation_file'] = sample_var[
            'file_head'] + '_saturation.txt'
        #sample log
        sample_var['sample_log'] = sample_var['file_head'] + '.log'

        #sequence alignment
        if sample_var['phip_alignment'] == 'yes':
            print("\n###sequence alignment", sample_var['tool_aligner'])
            #output is sam file
            if sample_var['tool_aligner'] == 'bowtie1':
                myAlign.alignment(sample_var).bowtie1_alignment()

        #counts reads
        if sample_var['phip_counting'] == 'yes':
            #RC matrix by peptides
            myAlign.alignment(sample_var).count_reads()
            #RC matrix by proteins
            if 'file_annotation' in self.par.keys():
                self.combine_peptides(sample_var)

        #update sample log
        sample_times = mySystem.system().get_time(sample_var['start_time'])
        sample_times['sample_name'] = sample_name
        myIO.file_os(sample_var['sample_log'], '=').line_replace(sample_times)
Ejemplo n.º 3
0
 def genome_annot(self):
     if par['web_site'] == 'ENSEML':
         #1: select data type
         data_types = ['dna_fa', 'genome_CDS', 'gtf', 'gff', 'protein']
         self.par['data_type'] = mySystem.system().select_key(data_types)
         #2: download
         if par['data_type'] == 'dna_fa':
             myDownload.ensembl(par['specie'],
                                par['dir_out']).download_dna()
         else:
             myDownload.ensembl(par['specie'],
                                par['dir_out']).download_annot(
                                    par['data_type'])
     elif par['web_site'] == 'NCBI':
         #1: select data type
         data_types = ['dna_fa', 'RNA', 'gff', 'protein']
         self.par['data_type'] = mySystem.system().select_key(data_types)
         #2: donwload
         if par['data_type'] == 'dna_fa':
             myDownload.NCBI(par['specie'], par['dir_out']).download_dna()
         else:
             myDownload.NCBI(par['specie'], par['dir_out']).download_annot(
                 par['data_type'])
Ejemplo n.º 4
0
 def download_idmapping(self):
     #get web file list
     url_idmapping = self.url+'knowledgebase/idmapping/by_organism/'
     web_dir, web_files = web(url_idmapping).ls_html()
     #print web_files
     
     #select file
     file_names = filter(lambda x: '.dat.' in x, web_files.values())
     file_names.sort()
     file_name = mySystem.system().select_key(file_names, 'Select web file')
     #download idmapping dat file
     url_file = url_idmapping + file_name
     local_file = self.out_dir + file_name
     web(url_file).download_file(local_file)
     #decompress file
     ungz_file = myIO.file_os(local_file).decompress_gz()
     print('Save ', url_file, ' as ', ungz_file)
     return ungz_file
Ejemplo n.º 5
0
    #check python version
    if int(sys.version[0]) <= 2:
        print(sys.version)
        print(
            '\nError: The version of python required for the pipeline running is at least v3.4.~\n'
        )
        sys.exits(2)

    ########################################
    #read variables.txt
    #var_file = '/home/yuan/rawdata_phip/phipseq17_virus_variables.txt'
    var_file = os.path.abspath(sys.argv[1])
    #print(var_file)
    par = myIO.file_os(var_file, '=').to_dict()
    par['file_var'] = var_file
    #initiate parameters, directories and files
    par = launch_phip(par).init_par()

    ######################################
    #main loop
    bioPHIPfunc.phip(par).main_loop()
    ######################################

    #end
    times_dict = mySystem.system().get_time(start_time)
    myIO.file_os(par['file_total_log'], '=').line_replace(times_dict)
    print('\n\nDuration: ', times_dict['duration'])

    print('\n\nGreat! It is done!!!\n\n\n')
Ejemplo n.º 6
0
    #initiate dictionary saving parameters
    par = {
        'in_out': 'Continue'
    }
    annot = download_annot(par)
    ########################################
    #

    #2: download dir
    par['dir_home'] = myIO.dir_os('/home/yuan/data_preparation/').stdin_dir(
        'Enter the directory path storing downloads files')
    print par['dir_home']
    while (par['in_out'] == 'Continue'):
        #2:select ftp or web site
        web_sites = ['NCBI', 'ENSEML', 'UniProt']
        par['web_site'] = mySystem.system().select_key(
            web_sites, 'Select public database')
        par['dir_out'] = par['dir_home'] + par['web_site'] + '/'
        #1: select file types
        if par['web_site'] in ['NCBI', 'ENSEML']:
            operations = ['Genome annotation', 'match fasta and gtf']
            par['operations'] = mySystem.system().select_key(
                operations, 'What is your operations')
        elif par['web_site'] == 'UniProt':
            par['operations'] = 'UniProt idmapping'

        #download genome annotations
        if par['operations'] == 'Genome annotation':
            #2: select specie
            species = ['human', 'mouse', 'rat', 'maize']
            par['specie'] = mySystem.system().select_key(
                species, 'Select specie of genome')