Beispiel #1
0
def run():
    file_step1 = './02-pipeline/step6.fna'
    """Retrieve known sequences"""
    knownseqfile = known()
    """Check if a BLAST DB of step7_knownseq already exists; Make BLAST DB if it doesn't"""
    blastdb = blast.check(knownseqfile)
    """BLAST unique mature peptides against known sequences"""
    blastoutfile = './02-pipeline/step7_blastp.csv'
    blast.blastp(blastdb, file_step1, blastoutfile)
    """Parse the BLASTp output file"""
    print 'Parsing BLASTp output...\n'
    blastout = []
    for b in blast.parse(
            blastoutfile):  #[cds_id, known sequence, %ID, length, evalue]
        blastout.append([str(b[0]), b[1], b[2], b[3], b[4]])
    """Populate the SQLite Annotated table with BLASTp results"""
    print 'Populating the Annotated table in the SQLite DB.'
    count = 0
    for b in blastout:
        c = mysqlpop.annotated(b)
        count += c
    print '{} hits have been entered in the SQLite annotated table.'.format(
        count)
    print 'DATA ENTRY INTO MYSQL ANNOTATED TABLE IS COMPLETE'

    filename = './02-pipeline/step7.csv'
    header = ['cds_id', 'knownNP id', 'PID', 'length', 'evalue']
    output.csv(filename, header, blastout)
    print 'The BLASTp results have were written to {}.\n'.format(filename)
Beispiel #2
0
def main():
    """
	Función que ejecuta todas las demas funciones
	"""

    # Control de argumentos

    try:
        query, subject, prosite, cov, ide = ctrl_arguments()
        es_fasta(query)
        results()
    except:
        sys.exit()

    # Blastp

    try:
        print("EJECUTANDO MODULO BLASTP...")
        multifasta = bl.convert(subject)
        db = bl.data_base(multifasta)
        dict_query = bl.dictionary(query)
        output = bl.blastp(query, db)
        bl.blastp_final(output, cov, ide, dict_query)
        print("\n~ Se ha realizado Blastp\n")
    except:
        print("\n~ No se ha realizado Blastp\n")
        sys.exit()

    # Muscle

    try:
        print("EJECUTANDO MODULO MUSCLE...")
        mu.muscle()
        print("\n~ Se ha realizado Muscle\n")
    except:
        print("\n~ No se ha realizado Muscle\n")
        sys.exit()

    # Dominios

    try:
        print("EJECUTANDO MODULO DOMINIOS...")
        db = do.parseo_dat(prosite)
        dict_pattern = do.dictionary_patterns(db)
        do.search_pattern(dict_pattern)
        print("\n~ Se ha realizado la busqueda de dominios\n")
    except:
        print("\n~ No se ha realizado la busqueda de dominios\n")
        sys.exit()

    # Graficos de Blastp

    try:
        print("EJECUTANDO MODULO GRAFICACIÓN...")
        gr.result_blast(output, cov, ide, dict_query)
        gr.graph_blast()
        gr.archivos_heatmap()
        gr.graph_heatmap()
        print("\n~ Se ha realizado la grafiaccion de los resultados"
              " del Blastp")
    except:
        print("\n~ No se ha realizado la graficacion de los resultados"
              " del Blastp")
        sys.exit()

    # Eliminar residuos

    clear()

    # Concluir

    message = (""" 
-------------------------------------------------------------\n
	    SE HA EJECUTADO EL PROGRAMA CON EXITO\n
-------------------------------------------------------------
	""")
    print(message)
Beispiel #3
0
#Definición argumentos obligatorios
query = sys.argv[1]
subject = sys.argv[2]

#Definición argumentos opcionales
if len(sys.argv) == 3:
	cov = 50
	identity = 25
elif len(sys.argv) == 5:
	cov = sys.argv[3]
	identity = sys.argv[4]
else:
	print("Error: number of arguments introduced is not valid")
	help()
	sys.exist()


#Ejecución del blast.py:
bs.multifasta(subject)
bs.database()
dicti = bs.dictionary(query)
output = bs.blastp(query)
bs.finalblast(cov, identity, dicti)

#Ejecución de muscle.py:
mc.muscle()

#Ejecución de dominios.py:
a = dm.parsear()
dictionary = dm.dicti()
dm.search(dictionary)
Beispiel #4
0
        try:
            subject_info_allDB = pickle.load(open(blast_all_pickle, 'rb'))
        except (EOFError, KeyError) as e:
            repr(e)
            print('Removing pickle file, redoing blast')
            os.remove(blast_all_pickle)
###### use if not instead of else incase the error above got thrown, and file got removed
if not developing or not os.path.isfile(blast_all_pickle):
    print 'blasting against all'
    blast.makeBLASTdb(script_path + 'fasta_files' + os.sep + organism_name +
                      '_all_proteins_smaller_than_500aa.fasta', script_path +
                      'databases' + os.sep + 'allDB_' + organism_name,
                      blast_folder)  # make all proteins database
    blast_records = blast.blastp(
        interest_proteins_path,
        script_path + 'databases' + os.sep + 'allDB_' + organism_name,
        args['eval_all'], blast_folder, script_path + 'blast_results/' +
        organism_name + '_blastpAllOutput.xml')
    subject_info_allDB = blast.getSubjectInfo(blast_records,
                                              proteins_of_interest,
                                              args['eval_all'])
    if developing:
        print 'saving in ' + blast_all_pickle
        if not os.path.exists(script_path + 'blast_results'):
            os.makedirs(script_path + 'blast_results')
        f = open(blast_all_pickle, 'wb')
        pickle.dump(subject_info_allDB, f)
print('len subject_info_allDB after < ' + str(args['all_size']) +
      'a.a. Protein blast: ' + str(len(subject_info_allDB)) + '\n')

subject_info_total = dict(subject_info_allDB.items())
Beispiel #5
0
    os.makedirs(script_path+'blast_results')
blast_all_pickle = script_path+'blast_results'+os.sep+fix_file_names(organism_name+'_blast_all_'+str(args['eval_all'])+'.p')
if developing:
    if os.path.isfile(blast_all_pickle):
        print 'BLAST records with this e-value against ALL database already exists, loading: '+blast_all_pickle
        try:
            subject_info_allDB = pickle.load( open(blast_all_pickle, 'rb' ))
        except (EOFError, KeyError) as e:
            repr(e)
            print('Renoving pickle file, redoing blast')
            os.remove(blast_all_pickle)
###### use if not instead of else incase the error above got thrown, and file got removed
if not developing or not os.path.isfile(blast_all_pickle):
    print 'blasting against all'
    blast.makeBLASTdb(script_path+'fasta_files'+os.sep+organism_name + '_all_proteins_smaller_than_500aa.fasta', script_path+'databases'+os.sep+'allDB_'+organism_name, blast_folder)       # make all proteins database
    blast_records = blast.blastp(interest_proteins_path, script_path+'databases'+os.sep+'allDB_'+organism_name, args['eval_all'], blast_folder, script_path+'blast_results/'+organism_name+'_blastpAllOutput.xml')
    subject_info_allDB = blast.getSubjectInfo(blast_records, proteins_of_interest, args['eval_all'])
    if developing:
        print 'saving in '+blast_all_pickle
        if not os.path.exists(script_path+'blast_results'):
            os.makedirs(script_path+'blast_results')
        f = open(blast_all_pickle, 'wb' )
        pickle.dump( subject_info_allDB, f )
print('len subject_info_allDB after < '+str(args['all_size'])+'a.a. Protein blast: '+str(len(subject_info_allDB))+'\n')

#if developing:
#    for subject in subject_info_allDB:                                                          # Loop over all
    #    print subject_info_allDB[subject]['query_title']
#        for prot in not_found:
#            if prot.lower() in subject.lower():
#                print('found in allDb')