Esempio n. 1
0
 def run(self):
     log_command(_log, 'gene',
         "/usr/bin/rsync" +
         " rsync://ftp.ncbi.nlm.nih.gov/gene/DATA/ASN_BINARY/All_Data.ags.gz" +
         " %s" % os.path.join(settings["DATADIR"], 'gene/'),
         timeout=24*60*60
     )
Esempio n. 2
0
    def run(self):
        in_path = mmcif_path(self._pdbid)
        out_path = dssp_mmcif_path(self._pdbid)

        if os.path.isfile(in_path):
            log_command(_log, 'dssp-from-mmcif',
                        "%s %s %s" % (MKDSSP, in_path, out_path))
Esempio n. 3
0
 def run(self):
     log_command(_log, 'pmc',
         "/usr/bin/rsync -rtv --delete" +
         " --include=\'articles*.tar.gz\' --exclude=\'*\'" +
         " rsync://ftp.ncbi.nih.gov/pub/pmc/" +
         " %s" % os.path.join(settings["DATADIR"], 'pmc/'),
         timeout=24*60*60
     )
Esempio n. 4
0
 def run(self):
     log_command(_log, 'unigene',
         "/usr/bin/rsync -rtv --delete" +
         " --include=\'*/\' --include=\'*.data.gz\' --exclude=\'*\'" +
         " rsync://ftp.ebi.ac.uk/pub/databases/Unigene/"
         " %s" % os.path.join(settings["DATADIR"], 'unigene/'),
         timeout=24*60*60
     )
Esempio n. 5
0
 def run(self):
     log_command(_log, 'prosite',
         "/usr/bin/rsync -rtv --delete --include=\'prosite*.dat\'" +
         " --include=\'prosite*.doc\' --exclude=\'*\'" +
         " rsync://ftp.ebi.ac.uk/pub/databases/prosite/" +
         " %s" % os.path.join(settings["DATADIR"], 'prosite/'),
         timeout=24*60*60
     )
Esempio n. 6
0
 def run(self):
     log_command(_log, 'genbank',
         "/usr/bin/rsync -rtv --delete" +
         " --include=\'*.seq.gz\' --exclude=\'*\'" +
         " rsync://ftp.ncbi.nih.gov/genbank/" +
         " %s" % os.path.join(settings["DATADIR"], 'genbank/'),
         timeout=24*60*60
     )
Esempio n. 7
0
 def run(self):
     log_command(_log, 'enzyme',
         "/usr/bin/rsync" +
         " rsync://ftp.ebi.ac.uk/pub/databases/enzyme/release_with_updates/"
         +
         " %s" % os.path.join(settings["DATADIR"], 'enzyme/'),
         timeout=24*60*60
     )
Esempio n. 8
0
    def run(self):
        _log.info("[pdbfinder2] mkpdbfinder2 %s" % self._pdbid)

        log_command(_log, 'pdbfinder2',
            "python2 " + PDBFINDER2_SCRIPT + " -pdbftopdbf2" +
            " %s" % (self._pdbid.lower()),
            cwd="/srv/data/prog/pdbfinder2/"
        )
Esempio n. 9
0
 def run(self):
     log_command(_log, 'pfam',
         "/usr/bin/rsync -rtv --delete --include=\'Pfam-A.full.gz\'" +
         " --include='Pfam-A.seed.gz' --exclude=\'*\'" +
         " rsync://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/"
         " %s" % os.path.join(settings["DATADIR"], 'pfam/'),
         timeout=24*60*60
     )
Esempio n. 10
0
 def run(self):
     log_command(_log, 'refseq',
         "/usr/bin/rsync -rtv --delete" +
         " --include=\'*g{p,b}ff.gz\' --exclude=\'*\'" +
         " rsync://ftp.ncbi.nih.gov/refseq/release/complete/" +
         " %s" % os.path.join(settings["DATADIR"], 'refseq/'),
         timeout=24*60*60
     )
Esempio n. 11
0
    def run(self):
        in_path = pdb_flat_path(self._pdbid)
        if not os.path.isfile(in_path):
            return

        out_dir = pdbreport_path(self._pdbid)
        if not os.path.isdir(out_dir):
            os.mkdir(out_dir)

        log_path = os.path.join(out_dir, "log.log")

        txt_path = os.path.join(out_dir, "pdbout.txt")
        check_path = os.path.join(out_dir, "check.db")
        checkbz2_path = os.path.join(out_dir, "check.db.bz2")
        html_path = os.path.join(out_dir, "pdbout.html")
        index_path = os.path.join(out_dir, "index.html")
        ion_path = os.path.join(out_dir, "%s.ion" % self._pdbid)
        ionout_path = os.path.join(out_dir, "pdb%s_ION.OUT" % self._pdbid)

        whynot_path = ("/srv/data/scratch/whynot2/comment/%sWC.txt" %
                       (datetime.now().strftime("%G%m%d")))

        if log_command(_log,
                       'pdbreport',
                       ". %s; %s %s" % (ccp4setup, whatcheck, in_path),
                       cwd=out_dir,
                       timeout=5 * 60):
            if os.path.isfile(txt_path):
                log_command(_log, 'pdbreport', htmlgen, cwd=out_dir)
                if os.path.isfile(html_path) and valid_html(html_path):
                    os.rename(html_path, index_path)
                else:
                    _log.debug("validation failed")
                    if os.path.isfile(log_path):
                        log_to_whynot(log_path, self._pdbid, whynot_path)

                if os.path.isfile(check_path):
                    with BZ2File(checkbz2_path, 'wb') as g:
                        with open(check_path, 'rb') as f:
                            while True:
                                chunk = f.read(1024)
                                if len(chunk) <= 0:
                                    break
                                g.write(chunk)
                    os.remove(check_path)

                if os.path.isfile(ionout_path):
                    os.rename(ionout_path, ion_path)
            else:
                _log.debug("validation failed")
                if os.path.isfile(log_path):
                    log_to_whynot(log_path, self._pdbid, whynot_path)
        else:
            _log.error("[pdbreport] whatcheck timeout for %s" % self._pdbid)
Esempio n. 12
0
    def run(self):
        excludes = ''
        for pdbid in settings["FRAUD_IDS"]:
            excludes += " --exclude=\'%s\'" % pdbid

        log_command(_log, 'pdbredo',
            "/usr/bin/rsync -av --delete" +
            " rsync://rsync.pdb-redo.eu/pdb-redo/" +
            " %s" % os.path.join(settings["DATADIR"], 'pdb_redo/'),
            timeout=24*60*60
        )
Esempio n. 13
0
    def run(self):
        excludes = ''
        for pdbid in settings["FRAUD_IDS"]:
            excludes += " --exclude=\'r%ssf.ent.gz\'" % pdbid

        log_command(_log, 'structure_factors',
            "/usr/bin/rsync -rtv --delete --port=33444" +
            " --include=\'r????sf.ent.gz\' --exclude=\'*\'" + excludes +
            " rsync.wwpdb.org::ftp_data/structures/divided/structure_factors/**/" +
            " %s" % os.path.join(settings["DATADIR"], 'structure_factors/'),
            timeout=24*60*60
        )
Esempio n. 14
0
    def run(self):
        in_path = find_input(self._pdbid)
        out3_path = hssp3_path(self._pdbid)
        out1_path = hssp1_path(self._pdbid)
        err_path = "/srv/data/scratch/whynot2/hssp/%s.err" % self._pdbid

        if os.path.isfile(in_path):
            run_mkhssp(in_path, out3_path, "hssp", err_path)

        if os.path.isfile(out3_path):
            log_command(_log, 'hssp',
                        "%s %s %s" % (HSSPCONV, out3_path, out1_path))
            if os.path.isfile(err_path):
                os.remove(err_path)
Esempio n. 15
0
    def run(self):
        if not os.path.isfile(pdb_path(self._pdbid)):
            whynot_path = datetime.datetime.now().strftime(
                    '/srv/data/scratch/whynot2/comment/%Y%m%d_pdbredo.txt')
            with open(whynot_path, 'a') as f:
                f.write('COMMENT: No PDB-format coordinate file available\n')
                f.write('PDB_REDO, %s\n' % self._pdbid)
        elif not os.path.isfile(pdb_flat_path(self._pdbid)):
            PdbExtractJob(pdb_path(self._pdbid)).run()

        _log.info("[pdbredo] running pdbredo for %s" % self._pdbid)
        days = 3 * 24 * 60 * 60
        if not log_command(_log, 'pdbredo',
                           "%s %s" % (redo_script, self._pdbid),
                           timeout=days):
            _log.error("[pdbredo] pdbredo timeout for %s" % self._pdbid)

            whynot_path = os.path.join(zata_dir, 'whynot.txt')
            with open(whynot_path, 'a') as f:
                f.write(
                    'COMMENT: PDB REDO script timed out\nPDB_REDO, %s'
                    % self._pdbid
                )

        tot_path = os.path.join(pdbredo_path(self._pdbid),
                                  "%s_final_tot.pdb" % self._pdbid)
        if os.path.isfile(tot_path):
            link_path = os.path.join(settings["DATADIR"],
                                     "pdb_redo/flat/%s" % self._pdbid)
            if os.path.islink(link_path):
                os.symlink(tot_path, link_path)
Esempio n. 16
0
    def run(self):
        if self._src.lower() == 'pdb':
            struct_path = pdb_path(self._pdbid)
        elif self._src.lower() == 'redo':
            struct_path = final_path(self._pdbid)
        else:
            raise Exception("unknown structure type %s" % self._src)

        in_path = wilist_data_path(self._src, self._lis_type, self._pdbid)
        root_dir = os.path.join(settings["DATADIR"], "wi-lists/%s" % self._src)
        if os.path.isfile(in_path):
            os.environ["SCENES_SETTINGS"] = scene_settings
            log_command(_log,
                        'scene',
                        "%s %d %s %s %s %s %s" %
                        (script, os.getpid(), struct_path, self._pdbid,
                         self._src.upper(), commands[self._lis_type], in_path),
                        cwd=root_dir)
Esempio n. 17
0
    def run(self):
        dat_dir = os.path.join(pdbfinder2_dir, "data/")
        out_file = os.path.join(pdbfinder2_dir, "PDBFIND2.TXT")
        gz_file = os.path.join(pdbfinder2_dir, "PDBFIND2.TXT.gz")

        log_command(_log, 'pdbfinder2',
            "/srv/data/prog/pdbfinder2/mergepdbfinder2.pl" +
            " %s %s" % (out_file, dat_dir),
            cwd="/srv/data/prog/pdbfinder2/"
        )

        _log.debug("[pdbfinder2] compressing %s" % out_file)
        with GzipFile(gz_file, 'wb') as g:
            with open(out_file, 'rb') as f:
                while True:
                    chunk = f.read(1024)
                    if len(chunk) <= 0:
                        break
                    g.write(chunk)
Esempio n. 18
0
    def run(self):
        uniprot_dir = os.path.join(settings["DATADIR"], 'uniprot/')
        fasta_dir = os.path.join(settings["DATADIR"], 'fasta/')

        if not os.path.isdir(uniprot_dir):
            os.mkdir(uniprot_dir)

        for filename in ['uniprot_sprot.fasta.gz', 'uniprot_trembl.fasta.gz',
                         'uniprot_sprot.dat.gz', 'uniprot_trembl.dat.gz',
                         'README', 'reldate.txt']:
            log_command(_log, 'uniprot',
                '/usr/bin/wget -q' +
                ' ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/%s' % filename +
                ' -N -P %s' % uniprot_dir,
                timeout=24*60*60
            )

        #ftp = FTP('ftp.uniprot.org', timeout=3600)
        #ftp.login()
        #ftp.cwd('pub/databases/uniprot/current_release/knowledgebase/complete/')
        #for filename in ['uniprot_sprot.fasta.gz', 'uniprot_trembl.fasta.gz',
        #                 'uniprot_sprot.dat.gz', 'uniprot_trembl.dat.gz',
        #                 'README', 'reldate.txt']:
        #    with open(os.path.join(uniprot_dir, filename), 'wb') as f:
        #        _log.info("[uniprot] retrieve %s" % filename)
        #        ftp.retrbinary('RETR %s' % filename, f.write)
        #ftp.quit()

        # Decompress fastas:
        for filename in ['uniprot_sprot.fasta.gz', 'uniprot_trembl.fasta.gz']:
            fastaname = os.path.splitext(filename)[0]
            fasta_path = os.path.join(fasta_dir, fastaname)
            _log.info("[uniprot] extract to %s" % fasta_path)
            with open(fasta_path, 'wb') as f:
                with GzipFile(os.path.join(uniprot_dir, filename), 'rb') as g:
                    while True:
                        chunk = g.read(65536)
                        if len(chunk) <= 0:
                            break
                        f.write(chunk)
Esempio n. 19
0
    def run(self):
        in_path = pdb_flat_path(self._pdbid)
        if not os.path.isfile(in_path):
            return

        out_dir = hbonds_path(self._pdbid)
        if not os.path.isdir(out_dir):
            os.mkdir(out_dir)

        _log.info("making hbonds for %s" % self._pdbid)

        logfilename = "%s.hb2.log" % self._pdbid
        script = """
                 GETMOL %s Y %s
                 HBONDS
                 HB2INI

                 DOLOG %s 0
                 HBONDS
                 HB2LIS protein 0 protein 0
                 NOLOG
                 FULLST Y
                 """ % (in_path, self._pdbid, logfilename)
        try:
            tmpdir = tempfile.mkdtemp()
            if log_command(_log, 'hbonds', whatif,
                           cwd=tmpdir, timeout=20, strin=script):

                logfilepath = os.path.join(tmpdir, logfilename)
                out_path = os.path.join(out_dir, "%s.hb2.bz2" % self._pdbid)

                if os.path.isfile(logfilepath):
                    with open(logfilepath, 'rb') as g:
                        with BZ2File(out_path, 'wb') as f:
                            for line in g.readlines()[2:]:
                                f.write(line.decode('ascii').replace("->", "-").encode('ascii'))
                else:
                    _log.error("hb2 log not generated for %s" % self._pdbid)
            else:
                _log.error("[hbonds] whatif timeout for %s" % self._pdbid)
        finally:
            # Remove all whatif runtime files
            if os.path.isdir(tmpdir):
                shutil.rmtree(tmpdir)
Esempio n. 20
0
 def run(self):
     log_command(_log, "pdbredo", "/srv/data/pdb_redo/alldata.csh")
Esempio n. 21
0
 def run(self):
     log_command(
         _log, 'bdb',
         ". %s; %s %s %s %s" % (ccp4_setup, script, bdb_dir,
                                pdb_flat_path(self._pdbid), self._pdbid))
Esempio n. 22
0
 def run(self):
     log_command(_log, 'whynot',
                 "%s %s %s" % (CRAWL, self._databank, self._path))
Esempio n. 23
0
    def run(self):
        in_path = pdb_path(self._pdbid)
        out_path = dssp_path(self._pdbid)

        if os.path.isfile(in_path):
            log_command(_log, 'dssp', "%s %s %s" % (MKDSSP, in_path, out_path))