#taxa_file = os.path.expanduser("~/data_analysis/data/genome_assemblies/test_genomes_02062016.tsv")
taxa_file = os.path.expanduser("~/data_analysis/data/genome_assemblies/genome_lists/genomes_euks_Fungi_Protists_02082016.tsv")
#taxa_file = os.path.expanduser("~/data_analysis/data/genome_assemblies/genome_lists/test_genomes_02062016.tsv")
#taxa_file = os.path.expanduser("~/data_analysis/data/genome_assemblies/Sacch_with_headers.tsv")


output_path = os.path.expanduser("~/data_analysis/data/genome_assemblies/assemblies_02152016")
seq_downloader = os.path.expanduser("~/data_analysis/code/bioinformatics-toolbox/src/seq_convert_genbank.pl")

# Parameters
parse_headers = 0
taxa_col = 0

# Load taxa file data
print(tistamp(1)+"\tReading taxa...")
taxa = []
with open(taxa_file) as inputfile:
	for line in inputfile:
		taxa.append(line.strip().split(','))

validated_taxa = 0
reset_validation = 1
taxa_i = 0
skip_to = 0
end_at = 999
# Parse data
for taxon in taxa[1:]:
	taxa_i = taxa_i + 1

	# Skip ahead
Ejemplo n.º 2
0
    "~/data_analysis/data/genome_assemblies/genome_lists/genomes_euks_Fungi_Protists_02082016.tsv"
)
#taxa_file = os.path.expanduser("~/data_analysis/data/genome_assemblies/genome_lists/test_genomes_02062016.tsv")
#taxa_file = os.path.expanduser("~/data_analysis/data/genome_assemblies/Sacch_with_headers.tsv")

output_path = os.path.expanduser(
    "~/data_analysis/data/genome_assemblies/assemblies_02152016")
seq_downloader = os.path.expanduser(
    "~/data_analysis/code/bioinformatics-toolbox/src/seq_convert_genbank.pl")

# Parameters
parse_headers = 0
taxa_col = 0

# Load taxa file data
print(tistamp(1) + "\tReading taxa...")
taxa = []
with open(taxa_file) as inputfile:
    for line in inputfile:
        taxa.append(line.strip().split(','))

validated_taxa = 0
reset_validation = 1
taxa_i = 0
skip_to = 0
end_at = 999
# Parse data
for taxon in taxa[1:]:
    taxa_i = taxa_i + 1

    # Skip ahead
print("Cache: " + str(clear_cache))
'''
-word_size <Integer, >=4>
   Word size for wordfinder algorithm (length of best perfect match)
 -gapopen <Integer>
   Cost to open a gap
 -gapextend <Integer>
   Cost to extend a gap
 -penalty <Integer, <=0>
   Penalty for a nucleotide mismatch
 -reward <Integer, >=0>
   Reward for a nucleotide match
'''

# Load query file names
print(tistamp(1) + "\tReading queries...")
queries = []
with open(query_file) as inputfile:
    for line in inputfile:
        queries.append(line.strip().split(','))

# Load database file names
print(tistamp(1) + "\tReading databases...")
databases = []
with open(db_file) as inputfile:
    for line in inputfile:
        databases.append(line.strip().split(','))

# Start main loop of query sequence files
for query_file in queries:
    query_file = ''.join(query_file[0])
'''
-word_size <Integer, >=4>
   Word size for wordfinder algorithm (length of best perfect match)
 -gapopen <Integer>
   Cost to open a gap
 -gapextend <Integer>
   Cost to extend a gap
 -penalty <Integer, <=0>
   Penalty for a nucleotide mismatch
 -reward <Integer, >=0>
   Reward for a nucleotide match
'''

# Load query file names
print(tistamp(1)+"\tReading queries...")
queries = []
with open(query_file) as inputfile:
        for line in inputfile:
                queries.append(line.strip().split(','))

# Load database file names
print(tistamp(1)+"\tReading databases...")
databases = []
with open(db_file) as inputfile:
        for line in inputfile:
                databases.append(line.strip().split(','))

# Start main loop of query sequence files
for query_file in queries:
	query_file = ''.join(query_file[0])