import sys import os from database_info_parameters import parser parser.add_option("-a","--prefix", dest="prefix", help = "Cluster files prefix") (options, args) = parser.parse_args() import time for current_file in os.popen("ls %s*" %options.prefix): current_file = current_file.strip() shf = open(current_file+".sh",'w') #shf.write("/bin/bash\n") #shf.write("source /sbi/users/jgarcia/.bashrc\n") shf.write("setenv PYTHONPATH \"/sbi/users/jgarcia/biana/src\"\n") shf.write("cd /sbi/users/jgarcia/biana/scripts/administration\n") shf.write("python blast_cd_hit_clusters.py -n %s -c %s -i %s -f %s.blast.gz -s 3729884308 -l 632558808656\n" %(options.dbname, options.dbhost, current_file, current_file)) shf.close() #os.system("qsub -q sbi %s.sh" %current_file)
import biana.BianaDB import sys from database_info_parameters import parser parser.add_option("-i", "--input-file", dest="input_file", help="Sequence file", default="") (options, args) = parser.parse_args() dbaccess = biana.BianaDB.BianaDBaccess(dbname=options.dbname, dbuser=options.dbuser, dbhost=options.dbhost, dbpassword=options.dbpass, lock_tables=True) dbaccess._insert_blast_results_file(file_path=options.input_file) dbaccess.close()
import os import gzip import sys import re from database_info_parameters import parser import biana.BianaObjects import biana.BianaDB parser.add_option("-t","--type", dest="type", help = "proteinsequence or nucleotidesequence", default="") parser.add_option("--conserve-temporary-files", dest="temporaryfiles", help = "If \"yes\", it does not delete temporary files") (options, args) = parser.parse_args() if options.type is None or options.dbname is None: parser.print_help() sys.exit(1) temporal_all = "all_sequences.temp" temporal_removed = "unique_sequences.temp.gz" def remove_duplicated_sequences_from_file(input_file,output_file): """ Removes all duplicates in a sequence file with the format sequenceID Sequence For duplicated sequences, it takes the smallest sequenceID """
from database_info_parameters import parser import biana parser.add_option("-d","--description", dest="description", help = "BIANA database description", default="") (options, args) = parser.parse_args() if options.dbname is None: parser.print_help() else: biana.administration.create_biana_database( dbname = options.dbname, dbuser = options.dbuser, dbhost = options.dbhost, dbpassword = options.dbpass, description = options.description )
from database_info_parameters import parser import biana.BianaDB import biana.BianaObjects.sequenceUtilities import sys import gzip import os TEMP_FASTA_FILE = "temp_sequences_file.fasta" TEMP_CLSTR_PREFIX = "./temp_sequence_clusters" TEMP_CLSTR_FILE = TEMP_CLSTR_PREFIX + ".clstr" TEMP_BLAST_RESULTS_FILE = "./blast_results.txt.gz" parser.add_option("-t", "--type", dest="type", help="proteinsequence or nucleotidesequence", default="") parser.add_option("--cd-hit-identity-cluster", dest="cdhit_threshold", help="See CD-HIT documentation", default="") parser.add_option("--conserve-temporary-files", dest="temporaryfiles", help="If \"yes\", it does not delete temporary files") (options, args) = parser.parse_args() if options.type is None or options.dbname is None:
import biana.BianaDB import sys from database_info_parameters import parser parser.add_option("-i","--input-file", dest="input_file", help = "Sequence file", default="") (options, args) = parser.parse_args() dbaccess = biana.BianaDB.BianaDBaccess(dbname = options.dbname, dbuser = options.dbuser, dbhost = options.dbhost, dbpassword = options.dbpass, lock_tables = True) dbaccess._insert_blast_results_file( file_path=options.input_file ) dbaccess.close()
from database_info_parameters import parser import biana.BianaObjects import biana.BianaDB parser.add_option("-i", "--input-file", dest="input_file", help="CD HIT clusters file", default="") (options, args) = parser.parse_args() dbaccess = biana.BianaDB.BianaDBaccess(dbhost=options.dbhost, dbuser=options.dbuser, dbpassword=options.dbpass, dbname=options.dbname, lock_tables=True) biana.BianaObjects.sequenceUtilities.insert_cd_hit_clusters_to_biana_database( cd_hit_clusters_file=options.input_file, dbaccess=dbaccess) dbaccess.close()
from database_info_parameters import parser import biana.BianaObjects import biana.BianaDB parser.add_option("-i", "--input-file", dest="input_file", help="CD HIT clusters file", default="") (options, args) = parser.parse_args() dbaccess = biana.BianaDB.BianaDBaccess( dbhost=options.dbhost, dbuser=options.dbuser, dbpassword=options.dbpass, dbname=options.dbname, lock_tables=True ) biana.BianaObjects.sequenceUtilities.insert_cd_hit_clusters_to_biana_database( cd_hit_clusters_file=options.input_file, dbaccess=dbaccess ) dbaccess.close()
import os import gzip import sys import re from database_info_parameters import parser import biana.BianaObjects import biana.BianaDB parser.add_option("-t", "--type", dest="type", help="proteinsequence or nucleotidesequence", default="") parser.add_option( "--conserve-temporary-files", dest="temporaryfiles", help='If "yes", it does not delete temporary files' ) (options, args) = parser.parse_args() if options.type is None or options.dbname is None: parser.print_help() sys.exit(1) temporal_all = "all_sequences.temp" temporal_removed = "unique_sequences.temp.gz" def remove_duplicated_sequences_from_file(input_file, output_file): """ Removes all duplicates in a sequence file with the format sequenceID Sequence For duplicated sequences, it takes the smallest sequenceID """
# It creates a formatted blast database with all protein sequences in the database # CD Hit results are also inserted in database from database_info_parameters import parser import biana.BianaDB import biana.BianaObjects.sequenceUtilities import sys import gzip import os TEMP_FASTA_FILE = "temp_sequences_file.fasta" TEMP_CLSTR_PREFIX = "./temp_sequence_clusters" TEMP_CLSTR_FILE = TEMP_CLSTR_PREFIX+".clstr" TEMP_BLAST_RESULTS_FILE = "./blast_results.txt.gz" parser.add_option("-t","--type", dest="type", help = "proteinsequence or nucleotidesequence", default="") parser.add_option("--cd-hit-identity-cluster", dest="cdhit_threshold", help = "See CD-HIT documentation", default="") parser.add_option("--conserve-temporary-files", dest="temporaryfiles", help = "If \"yes\", it does not delete temporary files") (options, args) = parser.parse_args() if options.type is None or options.dbname is None: parser.print_help() sys.exit(1) # First, gets FASTA file