Beispiel #1
0
import sys
import os
from database_info_parameters import parser

parser.add_option("-a","--prefix", dest="prefix",
                  help = "Cluster files prefix")


(options, args) = parser.parse_args()

import time

for current_file in os.popen("ls %s*" %options.prefix):
	current_file = current_file.strip()
	shf = open(current_file+".sh",'w')
	#shf.write("/bin/bash\n")
	#shf.write("source /sbi/users/jgarcia/.bashrc\n")
	shf.write("setenv PYTHONPATH \"/sbi/users/jgarcia/biana/src\"\n")
	shf.write("cd /sbi/users/jgarcia/biana/scripts/administration\n")
	shf.write("python blast_cd_hit_clusters.py -n %s -c %s -i %s -f %s.blast.gz -s 3729884308 -l 632558808656\n" %(options.dbname, options.dbhost, current_file, current_file))
	shf.close()
	#os.system("qsub -q sbi %s.sh" %current_file)

Beispiel #2
0
import biana.BianaDB
import sys
from database_info_parameters import parser

parser.add_option("-i",
                  "--input-file",
                  dest="input_file",
                  help="Sequence file",
                  default="")

(options, args) = parser.parse_args()

dbaccess = biana.BianaDB.BianaDBaccess(dbname=options.dbname,
                                       dbuser=options.dbuser,
                                       dbhost=options.dbhost,
                                       dbpassword=options.dbpass,
                                       lock_tables=True)

dbaccess._insert_blast_results_file(file_path=options.input_file)

dbaccess.close()
import os
import gzip
import sys
import re

from database_info_parameters import parser
import biana.BianaObjects
import biana.BianaDB

parser.add_option("-t","--type", dest="type",
                  help = "proteinsequence or nucleotidesequence", default="")

parser.add_option("--conserve-temporary-files", dest="temporaryfiles",
                  help = "If \"yes\", it does not delete temporary files")

(options, args) = parser.parse_args()

if options.type is None or options.dbname is None:
        parser.print_help()
	sys.exit(1)

temporal_all = "all_sequences.temp"
temporal_removed = "unique_sequences.temp.gz"


def remove_duplicated_sequences_from_file(input_file,output_file):
        """
        Removes all duplicates in a sequence file with the format sequenceID    Sequence

        For duplicated sequences, it takes the smallest sequenceID
        """
Beispiel #4
0
from database_info_parameters import parser
import biana


parser.add_option("-d","--description", dest="description",
                  help = "BIANA database description", default="")

(options, args) = parser.parse_args()

if options.dbname is None:
	parser.print_help()
else:
	biana.administration.create_biana_database( dbname = options.dbname,
						    dbuser = options.dbuser,
						    dbhost = options.dbhost,
						    dbpassword = options.dbpass,
						    description = options.description )

Beispiel #5
0
from database_info_parameters import parser
import biana.BianaDB
import biana.BianaObjects.sequenceUtilities
import sys
import gzip
import os

TEMP_FASTA_FILE = "temp_sequences_file.fasta"
TEMP_CLSTR_PREFIX = "./temp_sequence_clusters"
TEMP_CLSTR_FILE = TEMP_CLSTR_PREFIX + ".clstr"
TEMP_BLAST_RESULTS_FILE = "./blast_results.txt.gz"

parser.add_option("-t",
                  "--type",
                  dest="type",
                  help="proteinsequence or nucleotidesequence",
                  default="")

parser.add_option("--cd-hit-identity-cluster",
                  dest="cdhit_threshold",
                  help="See CD-HIT documentation",
                  default="")

parser.add_option("--conserve-temporary-files",
                  dest="temporaryfiles",
                  help="If \"yes\", it does not delete temporary files")

(options, args) = parser.parse_args()

if options.type is None or options.dbname is None:
import biana.BianaDB
import sys
from database_info_parameters import parser

parser.add_option("-i","--input-file", dest="input_file",
                  help = "Sequence file", default="")

(options, args) = parser.parse_args()

dbaccess = biana.BianaDB.BianaDBaccess(dbname = options.dbname,
                                       dbuser = options.dbuser,
				       dbhost = options.dbhost,
                                       dbpassword = options.dbpass,
                                       lock_tables = True)

dbaccess._insert_blast_results_file( file_path=options.input_file )

dbaccess.close()

from database_info_parameters import parser
import biana.BianaObjects
import biana.BianaDB

parser.add_option("-i",
                  "--input-file",
                  dest="input_file",
                  help="CD HIT clusters file",
                  default="")

(options, args) = parser.parse_args()

dbaccess = biana.BianaDB.BianaDBaccess(dbhost=options.dbhost,
                                       dbuser=options.dbuser,
                                       dbpassword=options.dbpass,
                                       dbname=options.dbname,
                                       lock_tables=True)

biana.BianaObjects.sequenceUtilities.insert_cd_hit_clusters_to_biana_database(
    cd_hit_clusters_file=options.input_file, dbaccess=dbaccess)

dbaccess.close()
from database_info_parameters import parser
import biana.BianaObjects
import biana.BianaDB

parser.add_option("-i", "--input-file", dest="input_file", help="CD HIT clusters file", default="")

(options, args) = parser.parse_args()

dbaccess = biana.BianaDB.BianaDBaccess(
    dbhost=options.dbhost, dbuser=options.dbuser, dbpassword=options.dbpass, dbname=options.dbname, lock_tables=True
)

biana.BianaObjects.sequenceUtilities.insert_cd_hit_clusters_to_biana_database(
    cd_hit_clusters_file=options.input_file, dbaccess=dbaccess
)

dbaccess.close()
import os
import gzip
import sys
import re

from database_info_parameters import parser
import biana.BianaObjects
import biana.BianaDB

parser.add_option("-t", "--type", dest="type", help="proteinsequence or nucleotidesequence", default="")

parser.add_option(
    "--conserve-temporary-files", dest="temporaryfiles", help='If "yes", it does not delete temporary files'
)

(options, args) = parser.parse_args()

if options.type is None or options.dbname is None:
    parser.print_help()
    sys.exit(1)

temporal_all = "all_sequences.temp"
temporal_removed = "unique_sequences.temp.gz"


def remove_duplicated_sequences_from_file(input_file, output_file):
    """
        Removes all duplicates in a sequence file with the format sequenceID    Sequence

        For duplicated sequences, it takes the smallest sequenceID
        """
# It creates a formatted blast database with all protein sequences in the database
# CD Hit results are also inserted in database

from database_info_parameters import parser
import biana.BianaDB
import biana.BianaObjects.sequenceUtilities
import sys
import gzip
import os

TEMP_FASTA_FILE = "temp_sequences_file.fasta"
TEMP_CLSTR_PREFIX = "./temp_sequence_clusters"
TEMP_CLSTR_FILE = TEMP_CLSTR_PREFIX+".clstr"
TEMP_BLAST_RESULTS_FILE = "./blast_results.txt.gz"

parser.add_option("-t","--type", dest="type",
                  help = "proteinsequence or nucleotidesequence", default="")

parser.add_option("--cd-hit-identity-cluster", dest="cdhit_threshold",
                  help = "See CD-HIT documentation", default="")

parser.add_option("--conserve-temporary-files", dest="temporaryfiles",
                  help = "If \"yes\", it does not delete temporary files")

(options, args) = parser.parse_args()

if options.type is None or options.dbname is None:
    parser.print_help()
    sys.exit(1)


# First, gets FASTA file