Example #1
0
def is_valid_gzip(fn, lazy=False, use_pigz=False):
    '''
    We could instead use gunzip -t to check, but that actual requires
    iterating through the whole file, which is very slow. This is lazy,
    but at least it makes sure that it's a gzip file.

    lazy simply tries to see if the first 10 lines can be read.
    It isn't very safe.

    use_pigz uses pigz instead of gzip. A bad idea if a number of processes
    have already been spawned.
    '''
    if lazy:
        try:
            cc("gzip -dc %s | head &>/dev/null" % fn, shell=True)
            return True
        except CalledProcessError:
            return False
    # lazy has already returned. This is the "else".
    cmd = ("pigz" if use_pigz else "gzip") + " -dc "
    try:
        cc(cmd + " -t " + fn, shell=True)
        sys.stderr.write(fn + " is valid")
        return True
    except CalledProcessError:
        sys.stderr.write("Corrupted file " + fn + ". Delete, try again.")
        return False
Example #2
0
def get_gi2tax(folder=FOLDER):
    if not os.path.isdir(folder):
        os.makedirs(folder)
    target = folder + "/gi_taxid_nucl.dmp"
    if not os.path.isfile(target):
        cc("curl %s | gzip -dc > %s" % (GI2TAX_MAP_PATH, target), shell=True)
    return target
Example #3
0
def append_old_to_new(gi2tax_map, newmap, concat_map, folder=FOLDER):
    paths = co("find %s -name '*.fna'" % folder, shell=True)
    if isinstance(paths, bytes):
        paths = paths.decode()
    paths = paths.split()
    ofh = open(concat_map, "w")
    ofw = ofh.write
    print("Length of accepted things: %i" % len(gi2tax_map))
    found, missing = set(), set()
    for path in paths:
        sys.stderr.write("Processing path %s" % path)
        fl = xfirstline(path)
        ptoks = fl.split("|")
        name = ptoks[3]
        key = int(ptoks[1])
        try:
            val = gi2tax_map[key]
            ofw("%s\t%i\n" % (name, val))
            found.add(path)
        except KeyError:
            missing.add(int(fl.split("|")[1]))
    print("Missing: " + str(missing))
    ofh.close()
    cc("cat %s >> %s" % (newmap, concat_map), shell=True)
    return concat_map, found
Example #4
0
def fetch_genomes(folder, names=NAMES):
    for name in names:
        ftp_path = "/".join([ARCHIVE_BASE, name])
        cstr = ("wget -N -m -np -nd -e robots=off -P"
                " %s/%s -A .fna,.fna.gz %s") % (folder, name, ftp_path)
        cc(cstr, shell=True)
    fetch_i100(folder)
Example #5
0
File: init.py Project: atg/empty
def main():
    os.chdir(Path(__file__).resolve().parent)

    # ==============
    # === Client ===
    # ==============
    get_url(
        'client/static/vendor/mithril.min.js',
        'https://cdnjs.cloudflare.com/ajax/libs/mithril/2.0.0-rc.4/mithril.min.js'
    )  #'https://unpkg.com/mithril/mithril.min.js')
    get_url('client/static/vendor/lodash.min.js',
            'https://unpkg.com/lodash/lodash.min.js')
    get_url(
        'client/static/vendor/msgpack-lite.min.js',
        'https://cdnjs.cloudflare.com/ajax/libs/msgpack-lite/0.1.26/msgpack.min.js'
    )
    get_url(
        'client/static/vendor/system.min.js',
        'https://raw.githubusercontent.com/systemjs/systemjs/master/dist/system.min.js'
    ),

    npmmodules = [
        '@types/lodash',
        '@types/mithril',
        '@types/msgpack-lite',
    ]
    cc(['yarn', 'add'] + npmmodules, cwd="client")

    # ==============
    # === Server ===
    # ==============

    # Node modules
    npmmodules = [
        '@types/node',
        'lodash',
        '@types/lodash',
        # Hyperscript for HTML templating
        'hyperscript',
        '@types/hyperscript',
        # Express-related modules
        'express',
        '@types/express',
        'body-parser',
        '@types/body-parser',
        'cookie-parser',
        '@types/cookie-parser',
        'express-session',
        '@types/express-session',
        'memorystore',  # '@types/memorystore',
        'csurf',
        '@types/csurf',
        # I always end up using websockets for something
        'ws',
        '@types/ws',
        'msgpack-lite',
        '@types/msgpack-lite',
    ]
    cc(['yarn', 'add'] + npmmodules, cwd="server")
Example #6
0
def get_acceptable_taxids(taxmap, path=PATH):
    ret = set()
    cc("curl %s > tax_summary.txt" % path, shell=True)
    for line in open("tax_summary.txt"):
        toks = line.split()
        if toks[0] == "Accession":
            continue
        tax = int(toks[3])
        if tax in ret:
            continue
        else:
            ret |= fill_set_from_tax(tax, taxmap)
            print("Size of ret: %i" % len(ret))
    print("Acceptable:", ret)
    # cc("rm tax_summary.txt", shell=True)
    return ret
Example #7
0
def make_output(paths, outpath):
    if not outpath:
        if not paths:
            print("Missing input in folder")
            return
        fld = "/".join(paths[0].split("/")[:-1])
        toks = paths[0].split("/")[-1].split("_")
        toks[2] = "LALL"
        toks[-1] = "ALL.fastq.gz"
        outpath = fld + "/" + "_".join(toks)
    # print("Outpath: %s" % outpath)
    cstr = "cat " + " ".join(paths) + " > " + outpath
    # print(cstr)
    cc(cstr, shell=True)
    if is_valid_gzip(outpath):
        [cc("rm -f " + path, shell=True) for path in paths]
    return outpath
Example #8
0
def retry_cc(tup):
    cstr, die = tup
    RETRY_LIMIT = 10
    r = 0
    while r < RETRY_LIMIT:
        try:
            print(cstr, file=sys.stderr)
            cc(cstr, shell=True)
            return
        except CalledProcessError:
            print("retry number", r, file=sys.stderr)
            r += 1
    if die:
        raise Exception("Could not download via %s "
                        "even after %i attempts." % (cstr, RETRY_LIMIT))
    else:
        sys.stderr.write("Could not download %s even after %i attempts" %
                         (cstr, RETRY_LIMIT))
Example #9
0
def main():
    args = getopts()
    gi2tax = get_gi2tax(args.folder)
    if args.no_download is False:
        fetch_genomes(args.folder)
    print("Getting acceptable taxids")
    taxmap = build_full_taxmap(args.taxonomy)
    acceptable_taxids = get_acceptable_taxids(taxmap)
    print("Appending old to new")
    concat, found = append_old_to_new(parse_gi2tax(gi2tax, acceptable_taxids),
                                      args.new_refseq_nameid_map,
                                      args.combined_nameid_map, args.folder)
    cc("sort {0} | uniq > tmp.zomg && mv tmp.zomg {0}".format(concat),
       shell=True)
    nl = int(co("wc -l %s" % concat, shell=True).decode().split()[0])
    sys.stderr.write("Concatenated file of total lines "
                     "%i is written to %s.\n" % (nl, concat))
    with open(args.found if args.found else "found_paths.txt", "w") as f:
        for path in found:
            f.write(path + "\n")
    return 0
Example #10
0
def main():
    global TAX_PATH
    tax_path = TAX_PATH  # Make global variable local
    args = getopts()
    ref = args.ref if args.ref else "ref"
    if argv[1:] and argv[1] == "nodes":
        if not os.path.isfile("%s/nodes.dmp" % ref):
            cc("curl {tax_path} -o {ref}/"
               "taxdump.tgz && tar -zxvf {ref}/taxdump.tgz"
               " && mv nodes.dmp {ref}/nodes.dmp".format(**locals()),
                shell=True)
            return 0
    if not os.path.isdir(ref):
        os.makedirs(ref)
    clades = args.clades if args.clades else DEFAULT_CLADES
    for clade in clades:
        try:
            assert clade in ALL_CLADES_MAP or clade in ["all", "default"]
        except AssertionError:
            print("Clade %s not 'all', 'default', or one of the valid "
                  "clades: %s" % (clade, ALL_CLADES_STR), file=sys.stderr)
            sys.exit(ExitCodes.EXIT_FAILURE)
    to_dl = get_clade_map(clades)
    print("About to download clades %s" % ", ".join(to_dl), file=sys.stderr)
    nameidmap = {}
    for clade in to_dl:
        cladeidmap = {}
        if not os.path.isdir(ref + "/" + clade):
            os.makedirs(ref + "/" + clade)
        if not os.path.isfile("%s/%s/as.%s.txt" % (ref, clade, clade)):
            cstr = ("curl %s/assembly_summary.txt "
                    "-o %s/%s/as.%s.txt") % (to_dl[clade], ref, clade, clade)
            print(cstr)
            cc(cstr, shell=True)
        to_dl[clade] = parse_assembly("%s/%s/as.%s.txt" %
                                      (ref, clade, clade), cladeidmap)
        spoool = multiprocessing.Pool(args.threads)
        spoool.map(check_path_lazy if args.lazy else check_path,
                   ("/".join([ref, clade, s.split("/")[-1]]) for
                    s in to_dl[clade]))
        cstrs = [("curl %s -o %s/%s/%s" %
                 (s, ref, clade, s.split("/")[-1])) for
                 s in to_dl[clade] if not os.path.isfile(
                     "%s/%s/%s" % (ref, clade, s.split("/")[-1]))]
        # If nodes.dmp hasn't been downloaded, grab it.
        if not os.path.isfile("%s/nodes.dmp" % ref):
            cstrs.append("curl {tax_path} -o {ref}/"
                         "taxdump.tgz && tar -zxvf {ref}/taxdump.tgz"
                         " && mv nodes.dmp {ref}/nodes.dmp".format(**locals()))
        spoool.map(retry_cc, ((cs, args.die) for cs in cstrs))
        # Replace pathnames with seqids
        for fn in list(cladeidmap.keys()):
            try:
                cladeidmap[xfirstline("/".join(
                    [ref, clade, fn]
                )).decode().split()[0][1:]] = cladeidmap[fn]
                del cladeidmap[fn]
            except FileNotFoundError:
                if args.die:
                    raise
                pass
        nameidmap.update(cladeidmap)
    print("Done with all clades", file=sys.stderr)
    with open(ref + "/" + args.idmap, "w") as f:
        fw = f.write
        for k, v in nameidmap.items():
            fw(k + "\t" + str(v) + "\n")
    return ExitCodes.EXIT_SUCCESS
Example #11
0
def check_path(fn, lazy=False):
    print("Checking path " + fn)
    if os.path.isfile(fn):
        if not is_valid_gzip(fn, lazy=lazy):
            cc("rm " + fn, shell=True)
Example #12
0
def set_clipboard(test_str):
    cc([CB, test_str])
Example #13
0
def application(environ, start_response):
    ItsMe = False
    xiia = False
    auth = False
    vlc = False
    response_body = None
    path = os.path.normpath(environ['PATH_INFO'])
    files = sorted(os.listdir(os.environ['OPENSHIFT_DATA_DIR'] + 'xml'), key=lambda x: (x.split('.')[-1], x.lower()))
    shows = getpls(None).allpro
    redirect = None

#    print('\n'.join(['%s: %s' % (key, value) for key, value in sorted(environ.items()) if key == 'HTTP_REFERER' or key == 'REQUEST_URI' or key == 'PATH_INFO' or key == 'QUERY_STRING' or key == 'wsgi.input']))

    if 'HTTP_COOKIE' in environ:
        rcookie = SimpleCookie(environ['HTTP_COOKIE'])
        if 'session' in rcookie and rcookie['session'].value == 'ItsMe' or rcookie['session'].value == 'itsme':
            ItsMe = True
        # elif 'session' in rcookie and rcookie['session'].value == 'malonso' and path == '/nextgp':
        #     ItsMe = True

    if 'HTTP_USER_AGENT' in environ:
        if 'Dalvik/' in environ['HTTP_USER_AGENT'] or 'Lavf/' in environ['HTTP_USER_AGENT']:
            xiia = True
            if 'HTTP_AUTHORIZATION' in environ:
                if environ['HTTP_AUTHORIZATION'].split(' ')[-1] == 'cGktdG9uOmVsY2Fsb3JldA==':
                    auth = True
        elif 'LibVL' in environ['HTTP_USER_AGENT']:
            xiia = True
            if 'HTTP_AUTHORIZATION' in environ:
                if environ['HTTP_AUTHORIZATION'].split(' ')[-1] == 'cGktdG9uOmVsY2Fsb3JldA==':
                    vlc = True

    if 'QUERY_STRING' in environ:
        if environ['QUERY_STRING'].startswith('redirect='):
            redirect = os.path.normpath(environ['QUERY_STRING'])

    if path == '/' and ItsMe is True:
        response_body = ['<tr><td style="text-align:left;"><a href="/xml/{}" download>{}</a></td><td style="text-align:right;">{} kB</td><td style="text-align:right;">{}</td></tr>'.format(f, f, round(os.stat(os.environ['OPENSHIFT_DATA_DIR'] + 'xml/' + f).st_size / 1024, 1), strftime('%-d/%m at %H:%M', localtime(os.stat(os.environ['OPENSHIFT_DATA_DIR'] + 'xml/' + f).st_mtime))) for f in files if not f.startswith('.')]
        response_body.append('''<tr><td style="text-align:center;padding-top:25px;"><button onclick="go('/daily');">Daily</button></td><td></td><td style="text-align:center;padding-top:25px;"><button onclick="go('/hourly');">Hourly</button></td></tr></table></center><script type="text/javascript">function changetext(text){over=document.querySelector("#over");document.querySelector("#result").textContent=text;setTimeout(function(){over.style.display="none";location.reload();},2e3);}function go(cual){document.querySelector("#over").style.display="block";var xmlhttp=new XMLHttpRequest();xmlhttp.open("GET",cual);xmlhttp.onreadystatechange=function(){if(xmlhttp.readyState==4&&xmlhttp.status==200){changetext(xmlhttp.responseText);}else{changetext(xmlhttp.statusText+" "+xmlhttp.status);}};xmlhttp.send(null);}</script></body></html>''')
        response_body.insert(0, '<!DOCTYPE html><html><head><meta content="charset=UTF-8"/><title>pi-ton</title></head><style>td {padding: 3px;}</style><body><center><div id="over"style="display:none;position:fixed;top:0%;left:0%;width:100%;height:100%;background-color:black;-moz-opacity:0.8;opacity:.80;filter:alpha(opacity=80);"><p id="result"style="color:red;margin-top:20%;font-weight:bolder;font-size:25px;">...</p></div><table style="margin-top:8%;"><th>Archivo</th><th>TamaƱo</th><th style="width:150px;text-align: right;">Fecha modif.</th>')
        response_body = ''.join(response_body)
        ctype = 'text/html; charset=UTF-8'
    elif path == '/login' and ItsMe is False:
        try:
            length = int(environ['CONTENT_LENGTH'])
            pwd = environ['wsgi.input'].read(length).decode().replace('session=', '')
            if pwd == 'ItsMe' or pwd == 'itsme' or pwd == 'malonso':
                cookie = SimpleCookie()
                cookie['session'] = pwd
                cookie['session']['path'] = '/'
                cookie['session']['max-age'] = '864000'
                cookieheaders = ('Set-Cookie', cookie['session'].OutputString())
                if pwd == 'malonso':
                    response_headers = [cookieheaders, ('Location', '/nextgp')]
                elif redirect is None or redirect == '/':
                    response_headers = [cookieheaders, ('Location', '/')]
                else:
                    response_headers = [cookieheaders, ('Location', '{}'.format(parse_qs(redirect)['redirect'][0]))]
                start_response('302 Found', response_headers)
                return ['1']
            raise Exception
        except:
            response_body = '''<!DOCTYPE html><html><head><meta content="charset=UTF-8"/><title>pi-ton</title></head><body><center><form action=""method="post"><input name="session"type="text"size="10"placeholder="And you are...?"style="margin-top:20%;text-align:center"autofocus required><input type="submit"value="Submit"style="display:none"></form></center></body></html>'''
            ctype = 'text/html; charset=UTF-8'
    elif path == '/nextgp' and ItsMe is True:
        response_body = mcal().nextgptext
        ctype = 'text/plain; charset=UTF-8'
    elif path == '/report' and ItsMe is True:
        try:
            length = int(environ['CONTENT_LENGTH'])
            w = open(os.environ['OPENSHIFT_LOG_DIR'] + 'report.log', 'a')
            w.write(environ['wsgi.input'].read(length).decode() + '\n')
            w.close()
            response_body = 'ok'
        except:
            r = open(os.environ['OPENSHIFT_LOG_DIR'] + 'report.log', 'r')
            response_body = ''.join(list(reversed(r.readlines())))
            r.close()
        ctype = 'text/plain; charset=UTF-8'
    elif path.startswith('/xml/') and path.split('/')[-1] in files and ItsMe is True:
        r = open(os.environ['OPENSHIFT_DATA_DIR'] + 'xml/' + path.split('/')[-1], 'r')
        response_body = r.read()
        r.close()
        ctypes = {'json': 'application/json; charset=UTF-8', 'xml': 'application/xml; charset=UTF-8'}
        ctype = ctypes[path.split('.')[-1]]
#    elif path == '/xml/lostoros.xml':
#        response_body = '''<!DOCTYPE html><html><head><title>404 Not Found</title></head><body bgcolor="white"><h1>404 Not Found</h1><p>The URL you requested could not be found.</p><hr><address>Apache/2.2.15 (Red Hat) Server at pi-ton.rhcloud.com Port 80</address></body></html>'''
#        response_headers = [('content-type', 'text/html; charset=UTF-8'), ('content-length', str(len(response_body.encode('utf8'))))]
#        start_response('404 Not Found', response_headers)
#        return [response_body.encode('utf8')]
#    elif path == '/xml/lostoros.xml':
#        r = open(os.environ['OPENSHIFT_DATA_DIR'] + 'xml/lostoros.xml', 'r')
#        response_body = r.read()
#        r.close()
#        ctype = 'application/xml; charset=UTF-8'
    elif path.startswith('/pls/') and path.endswith('.pls') and path.split('/')[-1].replace('.pls', '') in shows:
        if ItsMe is True:
            ctype = 'audio/x-scpls'
            response_body = getpls(path.split('/')[-1].replace('.pls', '')).joinedpls
        elif xiia is True:
            if auth is True:
                location = getpls(path.split('/')[-1].replace('.pls', '')).joinedpls.split('\n')[1].replace('File1=', '')
                start_response('302 Found', [('Location', location)])
                return ['1']
            elif vlc is True:
                response_body = getpls(path.split('/')[-1].replace('.pls', '')).joinedpls
                start_response('200 OK', [('Content-Type', 'audio/x-scpls')])
                return [response_body.encode()]
            else:
                response_body = '''<!DOCTYPE html><html><head><meta content="charset=UTF-8"/><title>pi-ton</title></head><body><center><form action="/login"method="post"><input name="session"type="text"size="10"placeholder="And you are...?"style="margin-top:20%;text-align:center"autofocus required><input type="submit"value="Submit"style="display:none"></form></center></body></html>'''
                response_headers = [('content-type', 'text/html; charset=UTF-8'), ('content-length', str(len(response_body.encode('utf8')))), ('WWW-Authenticate', 'Basic realm="pls@pi-ton"')]
                start_response('401 Unauthorized', response_headers)
                return [response_body.encode('utf8')]
        else:
            if redirect is None:
                start_response('302 Found', [('Location', '/')])
            else:
                start_response('302 Found', [('Location', '/login?redirect={}'.format(path))])
            return ['1']
    elif path.startswith('/pls/') and path.endswith('.txt') and path.split('/')[-1].replace('.txt', '') in shows:
        if ItsMe is True:
            ctype = 'text/plain; charset=UTF-8'
            response_body = getpls(path.split('/')[-1].replace('.txt', '')).joinedpls
        else:
            if redirect is None:
                start_response('302 Found', [('Location', '/')])
            else:
                start_response('302 Found', [('Location', '/login?redirect={}'.format(path))])
            return ['1']
    elif path == '/daily' or path == '/hourly' and ItsMe is True:
        sp = cc(['sh', './app-root/repo/.openshift/cron/{}/runner'.format(path.replace('/', '')), 'echo'])
        response_body = 'fail'
        if sp == 0:
            response_body = 'ok'
        ctype = 'text/plain; charset=UTF-8'
#    elif path == '/env':
#        response_body = '\n'.join(['%s: %s' % (key, value) for key, value in sorted(environ.items())])
#        ctype = 'text/plain'
#        response_body = ['%s: %s' % (key, value) for key, value in sorted(environ.items())]
#        response_body.append('SCRIPT_NAME: {}'.format(environ['SCRIPT_NAME']))
#        response_body = '\n'.join(response_body)
    elif path == '/logout':
        if 'HTTP_COOKIE' in environ:
            dcookie = SimpleCookie(environ['HTTP_COOKIE'])
            if 'session' in dcookie and dcookie['session'].value == 'ItsMe' or dcookie['session'].value == 'itsme' or dcookie['session'].value == 'malonso':
                dcookie['session']['expires'] = 'expires=Thu, 01 Jan 1970 00:00:00 GMT'
                cookieheaders = ('Set-Cookie', dcookie['session'].OutputString())
                response_headers = [cookieheaders, ('Location', '/login')]
                start_response('302 Found', response_headers)
                return ['1']
    else:
        if ItsMe is True:
            if redirect is None:
                start_response('302 Found', [('Location', '/')])
            else:
                start_response('302 Found', [('Location', '{}'.format(parse_qs(redirect)['redirect'][0]))])
            return ['1']
        if path == '/':
            start_response('302 Found', [('Location', '/login'.format(path))])
        else:
            start_response('302 Found', [('Location', '/login?redirect={}'.format(path))])
        return ['1']

    # always It's OK, okeeeya!?
    status = '200 OK'

    if ctype == 'audio/x-scpls':
        response_headers = [('Content-Type', ctype)]
        start_response(status, response_headers)
        return [response_body.encode()]
    response_headers = [('Content-Type', ctype), ('Content-Length', str(len(response_body.encode('utf8'))))]
    start_response(status, response_headers)
    return [response_body.encode('utf8')]
Example #14
0
def fetch_i100(folder):
    if not os.path.isdir(folder + "/i100"):
        os.makedirs(folder + "/i100")
    cstr = ("wget -N -m -np -nd -e robots=off -P %s/i100 -A .gz "
            "http://www.bork.embl.de/~mende/simulated_data/") % folder
    cc(cstr, shell=True)