def trim_pairs(file1, file2, src_dir, dst_dir, clip="", headcrop=13, quality=20, windowsize=4, minlen=36, crop=None): assert "TRIMOMMATIC" in os.environ, "environment variable not defined: TRIMOMMATIC" cmdcrop = ("CROP:" + str(crop)) if crop else "" cmd = """java -jar $TRIMOMMATIC PE \ {src}/{r1} {src}/{r2}\ {dst}/{r1} {dst}/{r1u} \ {dst}/{r2} {dst}/{r2u} \ {clip} {crop} HEADCROP:{headcrop} \ LEADING:{quality} TRAILING:{quality} SLIDINGWINDOW:{windowsize}:20 MINLEN:{minlen}""" cmd = cmd.format(clip=clip, quality=quality, windowsize=windowsize, minlen=minlen, headcrop=headcrop, dst=dst_dir, src=src_dir, r1=file1, r2=file2, r1u=file1.replace(".fastq.gz", "_unpaired.fastq.gz"), r2u=file2.replace(".fastq.gz", "_unpaired.fastq.gz"), crop=cmdcrop) execute(cmd)
def psipred(fasta, path="./", cmd="/opt/psipred/runpsipred",cpus=multiprocessing.cpu_count()): """ #runpsipred example.fasta --> example.horiz # PSIPRED HFORMAT (PSIPRED V3.5) Conf: 928999937998289999999999961696258972566893341566778999832667 Pred: CEEEEEEECCCCCHHHHHHHHHHHHHHCCCEEEEEECCCCCCCCCCCCCCEEEEEECCCC AA: PKALIVYGSTTGNTEYTAETIARQLANAGYEVDSRDAASVEAGGLFEGFDLVLLGCSTWG 10 20 30 40 50 60 :param fasta: :return: """ execute(cmd + " " + fasta + " " + str(cpus), wd=path) horiz = fasta.replace(".fasta", ".horiz") pred = "" conf = "" with open(horiz) as h: for x in h.readlines(): if x.startswith("Pred:"): pred += x.split(" ")[1].strip() if x.startswith("Conf:"): conf += x.split(" ")[1].strip() return (pred, conf)
def haplotype_call(bam_path, output_gvcf, ref_path, ploidy=2, only_cmd=False): bam_folder = os.path.dirname(bam_path) bam_file = os.path.basename(bam_path) ref_folder = os.path.dirname(ref_path) ref_file = os.path.basename(ref_path) out_folder = os.path.dirname(output_gvcf) out_file = os.path.basename(output_gvcf) docker_bam_folder = "/out/bam/" if bam_folder == out_folder: mount2 = "" docker_out_path = docker_bam_folder else: docker_out_path = "/out/out/" mount2 = f" -v {out_folder}:{docker_out_path} " cmd = f"""docker run --rm -w /out {mount2} -v {bam_folder}:/out/bam/ -v {ref_folder}:/out/ref/ broadinstitute/gatk:4.1.0.0 \ java -jar /gatk/gatk-package-4.1.0.0-local.jar HaplotypeCaller -ERC GVCF \ -R /out/ref/{ref_file} -ploidy {ploidy} \ -I /out/bam/{bam_file} --output-mode EMIT_ALL_SITES \ -O {docker_out_path}/{out_file}""" if only_cmd: return cmd else: execute(cmd)
def profile_search(database, pssm_file, search_result, cpu=1, evalue=0.00001): cmd = f"psiblast -db {database} -in_pssm {pssm_file} -num_threads {cpu} -evalue {evalue} -outfmt 5 -out {search_result} 1>&2" execute(cmd) try: search_result = list(bpsio.parse(search_result, "blast-xml")) except ParseError: sys.stderr.write( f'PSIProfile: error parsing results from {search_result}') return None for query in search_result: for hit in list(query): for hsp in hit: identity = 1.0 * hsp.ident_num / hsp.aln_span data = [ hsp.query.id, hsp.query_start, hsp.query_end, hsp.hit.id, hsp.hit_start, hsp.hit_end, hsp.evalue, identity, str(hsp.aln[0].seq), str(hsp.aln[1].seq) ] yield { f: data[i] for i, f in enumerate(PsiProfile.search_result_fields) }
def update_proteins(annotation_dir, proteome, seq_col_name, tax_id, identity=0.9, cpus=multiprocessing.cpu_count(), db_init=None): # if db_init: # from SNDG.Sequence.ProteinAnnotator import PABase # PABase.sqldb.initialize(db_init) # mkdir(annotation_dir) # out = annotation_dir + "/species_blast.tbl" # # tax = Tax.select().where(Tax.ncbi_taxon_id == tax_id).get() # species_tax = None # for tax in Tax.parents(tax): # if tax.node_rank == "genus": # species_tax = tax # break # tax_data = "/data/xomeq/tax/" # species_fasta = tax_data + str(int(species_tax.ncbi_taxon_id)) + ".fasta" if not os.path.exists(out): if not os.path.exists(species_fasta): Uniprot.download_proteome_from_tax(str(species_tax.ncbi_taxon_id), tax_data) cmd = "blastp -query %s -db %s -evalue 0.00001 -outfmt 6 -max_hsps 1 -qcov_hsp_perc 0.9 -num_threads %i -out %s" execute(cmd % (proteome, species_fasta, cpus, out)) species_desc = { x.id.split("|")[1]: " ".join(x.description.split()[1:]) for x in bpio.parse(species_fasta, "fasta") } total = Protein.objects(organism=seq_col_name).count() with tqdm(bpsio.parse(out, "blast-tab"), total=total) as pbar: for query in pbar: pbar.set_description(query.id) if query[0][0].ident_pct > identity: unip = query[0].id.split( "|")[1] if "|" in query[0].id else query[0].id dbxrefs = [ x.db + "||" + x.value for x in Mapping.select().where(Mapping.uniprot == unip) ] p = Protein.objects(gene=query.id, organism=seq_col_name).no_cache().get() if not p.description and unip in species_desc: p.description = species_desc[unip].split( "OS=")[0] + " | homology with: " + unip p.save() if dbxrefs: p = SearchLoader.update_protein_with_dbxref( query.id, dbxrefs, seq_col_name) p.save()
def run_dssp(self): out = tempfile.mkstemp(suffix=".dssp")[1] execute("dssp -i {pdb_path} -o {out}", pdb_path=self.pdb_path, out=out) with open(out) as h: start = False for l in h: if start: res = int(l[5:10]) aa = l[10:14].strip() ss = l[14:17].strip() bbl1 = l[23:24] bbl2 = l[24:25] bp1 = int(l[25:29]) bp2 = int(l[29:33]) bslabel = l[33:34] self.dssp.append( Struct(res=res, aa=aa, ss=ss, bp1=bp1, bp2=bp2, bbl1=bbl1, bbl2=bbl2, bslabel=bslabel)) else: if l.startswith(" # RESIDUE AA"): start = True
def offtargets(proteome, dst_resutls, offtarget_db, cpus=multiprocessing.cpu_count()): cmd = f"blastp -evalue 1e-5 -max_hsps 1 -outfmt 6 -db {offtarget_db} -query {proteome} -out {dst_resutls} -num_threads {cpus}|awk '$3>50'" execute(cmd) return dst_resutls
def profile_search(seq_id, database, pssm_file, search_result, cpu): execute( "psiblast -db {database} -in_pssm {input} -num_threads {cpu} -evalue 0.001 -outfmt 5 -out {output} > {cmd_out} ", output=search_result, database=database, input=pssm_file, cpu=cpu, cmd_out=search_result + ".out")
def build_profile(seq_fasta, database, iterations, pssm_file, cpu, evalue=0.0001): cmd = f"psiblast -query {seq_fasta} -db {database} -num_threads {cpu} -out_pssm {pssm_file} -evalue {evalue} -num_iterations {iterations} 1>&2 2>/dev/null" execute(cmd)
def offtargets(proteome, dst_resutls, offtarget_db, cpus=multiprocessing.cpu_count(), min_identity=50): cmd = f"diamond blastp --evalue 1e-5 --max-hsps 1 --outfmt 6 --max-target-seqs 10000 --db {offtarget_db} --query {proteome} --threads {cpus}|awk '$3>{min_identity}' > {dst_resutls}" execute(cmd) return dst_resutls
def blast_para_anotar(self, data_dir, fasta_query, fasta_db): execute("makeblastdb -in %s -dbtype prot" % (data_dir + fasta_db)) blast_result = fasta_db.replace(".fasta", "_blast.xml") cmd = "blastp -query %s -db %s -evalue 0.00001 -outfmt 5 -max_hsps 1 -qcov_hsp_perc 0.8 -num_threads 3 -out %s" execute(cmd % (fasta_query, fasta_db, blast_result)) return blast_result
def build_profile(seq_fasta, database, iterations, pssm_file, cpu): execute( "psiblast -query {input} -db {database} -num_threads {cpu} -out_pssm {output} -evalue 0.0001 -num_iterations {iterations} > {cmd_out}", output=pssm_file, iterations=iterations, database=database, input=seq_fasta, cpu=cpu, cmd_out=pssm_file + ".out")
def download_file(complete_url, target, ovewrite=False, retries=3, timeout=20): if not target.strip(): target = "./" if not os.path.exists(os.path.dirname(os.path.abspath(target))): raise FileNotFoundError("%s does not exists" % os.path.dirname(target)) if os.path.exists(target) and not ovewrite: raise OvewriteFileException("%s already exists" % target) execute( f'wget --timeout={timeout} --tries={retries} -O "{target}" "{complete_url}"' )
def fastqc(source_dir, dst_dir): mkdir(dst_dir) for filename in tqdm( sorted( glob(source_dir + "/*.fastq") + glob(source_dir + "/*.fastq.gz") + glob(source_dir + "/*.fq") + glob(source_dir + "/*.fq.gz"))): execute("fastqc {src} -q --extract -o {dst}", src=filename, dst=dst_dir)
def update_pdb(self, pdb): pdb = pdb.lower() mkdir(self.pdbs_dir + pdb[1:3]) if os.path.exists(self.pdb_path_gzipped(pdb)): execute("gunzip " + self.pdb_path_gzipped(pdb)) elif not os.path.exists(self.pdb_path(pdb)): download_file( self.url_pdb_files + pdb[1:3] + "/pdb" + pdb + self.pdb_download_extention, self.pdbs_dir + pdb[1:3] + "/pdb" + pdb + self.pdb_download_extention) execute("gunzip " + self.pdb_path_gzipped(pdb))
def download_deg(dst="/data/databases/deg/"): for x in ["p", "e", "a"]: filename = "deg-" + x + "-15.2" download_file("http://tubic.tju.edu.cn/deg/download/" + filename + ".zip", dst + filename + ".zip", ovewrite=True) execute("unzip -o " + dst + filename + ".zip" + " -d " + dst) os.remove(dst + filename + ".zip") execute("makeblastdb -dbtype prot -in " + dst + "degaa-" + x + ".dat")
def combineGVCFs(vcfs_folder, output_gvcf, ref_path): """ :param vcfs_path_list: list of paths of the vcf files :param gvcf_path: gvcf to be created :param ref_path: fasta from the reference genome :return: """ cmd_template = """ docker run --rm -w /out {mount2} -v {vcfs_folder}:/out/vcfs/ -v {ref_folder}:/out/ref/ broadinstitute/gatk:4.1.0.0 \ java -jar /gatk/gatk-package-4.1.0.0-local.jar CombineGVCFs -R /out/ref/{ref_file} {vcfs} \ -O {out_path}/{out_file} """ ref_folder = os.path.dirname(ref_path) ref_file = os.path.basename(ref_path) out_folder = os.path.dirname(output_gvcf) out_file = os.path.basename(output_gvcf) + ".bk" vcfs_path = "/out/vcfs/" if vcfs_folder == out_folder: mount2 = "" out_path = "/out/vcfs/" else: out_path = "/out/out/" mount2 = " -v {out_folder}:/out/out/ ".format( out_folder=out_folder) vcfs = " ".join([ "--variant {vcfs_path}".format(vcfs_path=vcfs_path) + x for x in os.listdir(vcfs_folder) if x.endswith(".vcf") or x.endswith(".vcf.gz") ]) cmd = cmd_template.format(vcfs=vcfs, out_folder=out_folder, out_file=out_file, mount2=mount2, out_path=out_path, ref_folder=ref_folder, ref_file=ref_file, vcfs_folder=vcfs_folder) print(cmd) execute(cmd) with open(out_folder + "/" + out_file) as h, open(output_gvcf, "w") as hw: for l in h: if l.startswith("#CHROM"): vec = l.split("\t") l = "\t".join(vec[:9] + [x.split(".variant")[0] for x in vec[9:]]) + "\n" hw.write(l)
def combineGVCFs(vcfs_folder, output_gvcf, ref_path, tmp="/tmp/combineGVCFs.vcf"): """ :param vcfs_path_list: list of paths of the vcf files :param gvcf_path: gvcf to be created :param ref_path: fasta from the reference genome :return: """ assert os.path.exists(ref_path), f'{ref_path} does no exists' assert os.path.exists(vcfs_folder), f'{vcfs_folder} does no exists' vcfs_folder = os.path.abspath(vcfs_folder) if not hasattr(output_gvcf, "write"): assert os.path.exists(os.path.dirname( output_gvcf)), f'{os.path.dirname(output_gvcf)} does no exists' vcf_files = [] for x in glob(vcfs_folder + "/*vcf*"): if x.endswith(".vcf") or x.endswith(".vcf.gz") or x.endswith( ".gvcf") or x.endswith(".gvcf.gz"): vcf_files.append(x) if not vcf_files: raise FileNotFoundError( f'no .vcf or .vcf.gz files where found at {vcfs_folder}') vcfs = " ".join([f"--variant {x}" for x in vcf_files]) cmd = f""" gatk CombineGVCFs -R {ref_path} {vcfs} -O {tmp} """ execute(cmd) with open(tmp) as h: if hasattr(output_gvcf, "write"): hw = output_gvcf else: hw = open(output_gvcf, "w") try: for l in h: if l.startswith("#CHROM"): vec = l.split("\t") l = "\t".join( vec[:9] + [x.split(".variant")[0] for x in vec[9:]]) hw.write(l) finally: hw.close()
def update_pdb(self, pdb): pdb = pdb.lower() mkdir(self.pdbs_dir + pdb[1:3]) if not os.path.exists(self.pdb_path(pdb)) or (os.path.getsize(self.pdb_path(pdb)) < 100): if os.path.exists(self.pdb_path_gzipped(pdb)) and (os.path.getsize(self.pdb_path_gzipped(pdb)) > 100): execute("gunzip " + self.pdb_path_gzipped(pdb)) if os.path.exists(self.pdb_path_gzipped(pdb)) and not os.path.exists(self.pdb_path(pdb)): os.remove(self.pdb_path_gzipped(pdb)) elif not os.path.exists(self.pdb_path(pdb)): download_file(self.url_pdb_files + pdb[1:3] + "/pdb" + pdb + self.pdb_download_extention, self.pdbs_dir + pdb[1:3] + "/pdb" + pdb + self.pdb_download_extention, ovewrite=True) execute("gunzip " + self.pdb_path_gzipped(pdb)) return self.pdb_path(pdb)
def test_residues_mapping(self): stdout = tempfile.NamedTemporaryFile() execute( "python3 -m SNDG.Structure.StructureVariant residues -i ./test/prot2.fasta --pdb_data /tmp/data -s 2VYI", stdout=stdout) with open(stdout.name) as output: contents = output.read() stdout.close() expected = ' '.join( """pdb chain resid alt ref pos pdb_pos 2vyi A 81 P G 23 1 2vyi A 160 K A 160 77 2vyi B 160 K A 160 74""".split()) self.assertEqual(expected, ' '.join(contents.strip().split()))
def load_msa(self, input_sequence, pdb_code, pdb_chain=None): pdb_code = pdb_code.lower() self.utils.update_pdb(pdb_code) self.ref_seq = bpio.read(input_sequence, "fasta") self.pdbfile = PDBFile(pdb_code, self.utils.pdb_path(pdb_code)) with open(self.seqs_path, "w") as h: bpio.write(self.ref_seq, h, "fasta") bpio.write(self.pdbfile.seq(selected_chain=pdb_chain), h, "fasta") cmd = docker_wrap_command( f'mafft --quiet --localpair --maxiterate 1000 {self.seqs_path} > {self.aln_path} ' ) execute(cmd) self.msa = MSAMap.from_msa(self.aln_path) self.res_map = self.pdbfile.residues_map(pdb_chain)
def hunt_pockets(self): abs_path = os.path.abspath(self.pdb_file_path) pdb_file = os.path.basename(abs_path) pdb_dir = os.path.dirname(abs_path) cmd = "docker run -u $(id -u):$(id -g) -w /out -v '{pdb_dir}':/out --rm ezequieljsosa/fpocket {fpocket} -f '{pdb_file}'".format( fpocket=self.fpocket_binary, pdb_file=pdb_file, pdb_dir=pdb_dir) self._execute(cmd) if os.path.abspath(self._pdb_file_directory) != os.path.abspath( self.work_directory): if os.path.exists(self.dest_path()): shutil.rmtree(self.dest_path(), True) work_dir = self._pdb_file_directory + "/" + self._out_directory() if os.path.exists(work_dir): execute(f'mv "{work_dir}" "{self.dest_path()}"') result = FpocketOutput(self.dest_path()) result.parse() return result
def phylo(vcf, output): cmd = f"""bcftools filter -i 'alt=\"*\"' {vcf} | bcftools norm -m -any | \ bcftools filter -e 'alt=\"*\"' | bcftools filter -i 'FORMAT/AD[*:1]>15' | \ sed 's|0/1:|1/1|' | sed 's|0\|1:|1/1|' > /tmp/spaning_del.vcf""" execute(cmd) cmd = f"bcftools filter -e 'alt=\"*\"' {vcf} > /tmp/no_spanning.vcf" execute(cmd) cmd = f"bcftools view /tmp/spaning_del.vcf | grep -v '^#' >> /tmp/no_spanning.vcf" execute(cmd) cmd = f"bcftools sort /tmp/no_spanning.vcf > {output}" execute(cmd)
def annotate(self, fasta_path, output, training=None, locustag="PROKKA", gram=None, genus="", species="", strain="", kingdom="Bacteria", gcode=0, rfam=False, increment=5, prefix="ann", cpus=1, centre=""): if not os.path.exists(os.path.abspath(output + "/../")): raise FileNotFoundError( f'{os.path.abspath(output + "/../")} not found, cant create output dir' ) if not os.path.exists(fasta_path): raise FileNotFoundError(f'{os.path.abspath(fasta_path)} not found') if training and not os.path.exists(training): raise FileNotFoundError(f'{os.path.abspath(training)} not found') db = f"--proteins {training}" if training else "" rfam = f"--rfam" if rfam else "" species = f"--species '{species}'" if species else "" strain = f"--strain '{strain}'" if strain else locustag kingdom = f"--kingdom '{kingdom}'" if kingdom else "" gram = f"--gram {gram}" if gram else "" centre = f"--centre '{centre}'" if centre else "" genus = f"--genus '{genus}'" if genus else "" cmd = f'''prokka --compliant --cpus {cpus} {gram} --addgenes {rfam} --locustag {locustag} --outdir {output} \ --prefix {prefix} --force {db} {fasta_path} {kingdom} {strain} \ --increment {increment} --gcode {gcode} {centre} {genus} {species}''' cmd2 = docker_wrap_command(cmd) execute(cmd2)
def accpro(fasta, path, cmd="/opt/sspro4/bin/predict_acc.sh"): """ #../bin/predict_ssa.sh 1aqta.fasta 1aqta.test cat /opt/sspro4/test/1aqta.acc6 1aqta_fastaalg STYHLDVVSAEQQMFSGLVEKIQVTGSEGELGIYPGHAPLLTAIKPGMIRIVKQHGHEEFIYLSGGILEVQPGNVTVLADTAIRGQDLDEARAMEAKRKAEEHISSSHGDVDYAQASAELAKAIAQLRVIELTKK eebebbbbbbeeebbeeebeebbbebeebbbbbbbebbbbbbbbebbbbbbebeeeeebbbbbbbbbbbbeeeebbbbbbbbeeeeebeeeebeebbeebbeebeeeeeeeebeebeebbeebbebbebbeeeee :param fasta: :param path: :param cmd: :return: """ acc6 = fasta.replace(".fasta", ".acc6") if not os.path.exists(acc6): execute(cmd + " " + fasta + " " + acc6, wd=path) with open(acc6) as h: return h.readlines()[2].strip()
def test_residues_ann(self): stdout = tempfile.NamedTemporaryFile() execute( "cat test/test.tbl | python -m SNDG.Structure.StructureVariant ann --pdb_data /tmp/data ", stdout=stdout) with open(stdout.name) as output: contents = output.read() stdout.close() anns = [json.loads(x) for x in contents.split("###") if x.strip()] self.assertEqual(4, len(anns)) r2vyi_B_160_K = [ x["ann"] for x in anns if x["residue"] == "2vyi_B_160_K" ][0] self.assertEqual(0.705, r2vyi_B_160_K["pockets"][0]["druggabilitty"]) r1azm_A_91_P = [ x["ann"] for x in anns if x["residue"] == "1azm_A_91_P" ][0] self.assertTrue("BINDING SITE FOR RESIDUE AZM A 262" in [x["details"] for x in r1azm_A_91_P["binding"]])
def download_proteome_from_tax(tax_id, dst_dir, format="fasta"): durl = 'http://www.uniprot.org/uniprot/?sort=&desc=&compress=yes&query=taxonomy:{tax}&fil=&format={format}&force=yes' download_file(durl.format(tax=tax_id, format=format), dst_dir + "/" + tax_id + "_all.fasta.gz", ovewrite=True) execute("gunzip " + dst_dir + "/" + tax_id + "_all.fasta.gz") execute("cd-hit -M 0 -c 0.9 -T 0 -i %s -o %s" % (dst_dir + "/" + tax_id + "_all.fasta", dst_dir + "/" + tax_id + ".fasta")) execute("makeblastdb -dbtype prot -in " + dst_dir + "/" + tax_id + ".fasta")
(time.time() - os.path.getatime(filepath)) / 60 / 60 / 24) > period) #os.environ["http_proxy"] = "http://proxy.fcen.uba.ar:8080" #os.environ["ftp_proxy"] = "http://proxy.fcen.uba.ar:8080" mkdir("/data/pdb/") download_file("ftp://ftp.wwpdb.org/pub/pdb/derived_data/index/entries.idx", "/data/pdb/entries.idx", ovewrite=True) pdbs = PDBs("/data/pdb/") pdbs.download_pdb_seq_ses() pdbs.update_pdb_dir() mkdir("/data/pdb/processed/") pdbs.pdbs_seq_for_modelling() execute("makeblastdb -dbtype prot -in /data/pdb/processed/seqs_from_pdb.fasta") if old_or_inexistent("/data/uniprot/uniref/uniref90/uniref90.fasta"): mkdir("/data/uniprot/uniref/uniref90") download_file( "ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz", "/data/uniprot/uniref/uniref90/uniref90.fasta.gz", ovewrite=True) execute("gunzip /data/uniprot/uniref/uniref90/uniref90.fasta.gz") if old_or_inexistent("/data/uniprot/uniref/uniref90/uniref90.fasta.pal"): execute( "makeblastdb -dbtype prot -in /data/uniprot/uniref/uniref90/uniref90.fasta" )
def quast(glob_exp, out, ref=None): execute("quast " + glob_exp + " -o " + out + ((" -R " + ref) if ref else ""))
os.environ["http_proxy"] = "http://proxy.fcen.uba.ar:8080" os.environ["ftp_proxy"] = "http://proxy.fcen.uba.ar:8080" if not os.path.exists("/data/cog/whog"): mkdir("/data/cog/") download_file("ftp://ftp.ncbi.nih.gov/pub/COG/COG/whog", "/data/cog/whog") if not os.path.exists("/data/cog/myva"): mkdir("/data/cog/") download_file("ftp://ftp.ncbi.nih.gov/pub/COG/COG/myva", "/data/cog/myva") execute("formatdb -i /data/cog/myva -o T") if not os.path.exists("/data/ec/PRIAM_MAR15/priam"): mkdir("/data/ec/") download_file("http://priam.prabi.fr/REL_MAR15/Distribution.zip", "/data/ec/PRIAM_MAR15.zip") execute_from("unzip /data/ec/PRIAM_MAR15.zip; exit 0;", "/data/ec/",retcodes=[0,1]) execute_from("ls /data/ec/PRIAM_MAR15/PROFILES/*.chk > priam", "/data/ec/PRIAM_MAR15/") execute_from("formatrpsdb -i /data/ec/PRIAM_MAR15/priam -o T", "/data/ec/PRIAM_MAR15/") if not os.path.exists("/data/pfamtigrfam/tirgfam.hmm"): mkdir("/data/pfamtigrfam/INFO") download_file("ftp://ftp.jcvi.org/pub/data/TIGRFAMs/TIGRFAMs_15.0_HMM.LIB.gz", "/data/pfamtigrfam/TIGRFAMs_15.0_HMM.LIB.gz") execute("gunzip /data/pfamtigrfam/TIGRFAMs_15.0_HMM.LIB.gz",retcodes=[0,2])