Example #1
0
def zippy_share(url: str) -> str:
    """ ZippyShare direct links generator
    Based on https://github.com/KenHV/Mirror-Bot """
    link = re.findall("https:/.(.*?).zippyshare", url)[0]
    response_content = (requests.get(url)).content
    bs_obj = BeautifulSoup(response_content, "lxml")

    try:
        js_script = bs_obj.find("div", {
            "class": "center",
        }).find_all("script")[1]
    except:
        js_script = bs_obj.find("div", {
            "class": "right",
        }).find_all("script")[0]

    js_content = re.findall(r'\.href.=."/(.*?)";', str(js_script))
    js_content = 'var x = "/' + js_content[0] + '"'

    evaljs = EvalJs()
    setattr(evaljs, "x", None)
    evaljs.execute(js_content)
    js_content = getattr(evaljs, "x")

    return f"https://{link}.zippyshare.com{js_content}"
Example #2
0
def zippy_share(url: str) -> str:
    """ ZippyShare direct links generator
    Based on https://github.com/KenHV/Mirror-Bot
             https://github.com/jovanzers/WinTenCermin """
    try:
        link = re.findall(r'\bhttps?://.*zippyshare\.com\S+', url)[0]
    except IndexError:
        raise DirectDownloadLinkException("No Zippyshare links found")
    try:
        base_url = re.search('http.+.zippyshare.com', link).group()
        response = requests.get(link).content
        pages = BeautifulSoup(response, "lxml")
        try:
            js_script = pages.find("div", {"class": "center"}).find_all("script")[1]
        except IndexError:
            js_script = pages.find("div", {"class": "right"}).find_all("script")[0]
        js_content = re.findall(r'\.href.=."/(.*?)";', str(js_script))
        js_content = 'var x = "/' + js_content[0] + '"'
        evaljs = EvalJs()
        setattr(evaljs, "x", None)
        evaljs.execute(js_content)
        js_content = getattr(evaljs, "x")
        return base_url + js_content
    except IndexError:
        raise DirectDownloadLinkException("ERROR: Can't find download button")
Example #3
0
 def deciphered_signature(self, signature=None, algo_js=None):
     #Returns the final deciphered signature by executing the Javascript
     algo_js = algo_js.replace(
         re.search(r'var output.*?"(.*?)"', algo_js).groups()[0], signature)
     context = EvalJs()
     context.execute(algo_js)
     return context.output
Example #4
0
async def zippy_share(url: str) -> str:
    link = re.findall("https:/.(.*?).zippyshare", url)[0]
    response_content = (requests.get(url)).content
    bs_obj = BeautifulSoup(response_content, "lxml")

    try:
        js_script = bs_obj.find("div", {
            "class": "center",
        }).find_all("script")[1]
    except BaseException:
        js_script = bs_obj.find("div", {
            "class": "right",
        }).find_all("script")[0]

    js_content = re.findall(r'\.href.=."/(.*?)";', str(js_script))
    js_content = 'var x = "/' + js_content[0] + '"'

    evaljs = EvalJs()
    setattr(evaljs, "x", None)
    evaljs.execute(js_content)
    js_content = getattr(evaljs, "x")

    dl_url = f"https://{link}.zippyshare.com{js_content}"
    file_name = basename(dl_url)

    return f"[{urllib.parse.unquote_plus(file_name)}]({dl_url})"
Example #5
0
    def getTk(self):

        with open('.\\config\\GoogleJS.js', encoding='utf8') as f:
            js_data = f.read()

        context = EvalJs()
        context.execute(js_data)
        tk = context.TL(self.text)

        return tk
Example #6
0
    def getTk(self, text):

        with open(this_file_dir + '/GoogleJS.js', encoding='utf8') as f:
            js_data = f.read()

        context = EvalJs()
        context.execute(js_data)
        tk = context.TL(text)

        return tk
Example #7
0
 def create_js_context(self):
     with self._context_lock:
         if self._context is None:
             registered_context = self._registered_context
             registered_context[self.MVAR_VARS] = {}
             self._context = EvalJs(context=registered_context,
                                    enable_require=True)
             if self._prepare_script:
                 for script in self._prepare_script:
                     self._context.execute(script)
Example #8
0
    def get_episode_links(self, item):
        url = f"https://manhua.dmzj.com/{item['path']}#@page=1"
        res = r.get(url)

        # 信息被JS加密,需要用内置的JS编译
        soup = bs4.BeautifulSoup(res.text, features="lxml")
        head_script = soup.head.script.string
        js = EvalJs()
        js.execute(head_script)

        # 从编译JS的结果提取信息
        item["links"] = [HOST + i for i in json.loads(js.pages)]
        item["sname"] = f"{js.g_comic_name}"
        item["chapter"] = f"{js.g_chapter_name}"
Example #9
0
def decrypt(data):
    if six.PY2:
        with open(os.path.join(os.path.dirname(__file__),
                               "primewire.js")) as f:
            code = f.read()
        jscntx = EvalJs()
        jscntx.execute(code)
        return jscntx.decode(data)
    else:
        from bfish import Cipher
        from textwrap import wrap
        e = data[:9].encode()
        t = data[9:]
        bd = base64.b64decode(t)
        bf = Cipher(bytearray(e))
        out = b""
        for chunk in bf.decrypt_ecb(bd):
            out += chunk
        return wrap(out.decode(), 5)
import sys
import tqdm
from multiprocessing import Pool
from collections import defaultdict

#gbids_to_coord = defaultdict(list)

fasta_file = "/mnt/gnpn/gnpn/projects/orphanpks/TargetMining/Blast/blast_results_seqs/blast_results.KS.fasta.cleanName.cdhit.99"
# Head of cdhit file:
# >AVFP01000283.1__724_1992_Microbial_mat_metagenome_scaffold_282__whole_genome_shotgun_sequence_0_1_9914_7e-169
# IAIIGMSGIFPDAEDVQTYWNNLCQGR
# >AM746676___5843905_5845200__0_-1_13033779_0.0

count = 0

context = EvalJs()

# Output file with number of gbids on which antismash was run
outfilefaa = "sequences.faa.21k.coord.fasta"
ff = open(outfilefaa, "w")

antismash_dir = "antismash_output_assemblies_all/"
for gbidfull in tqdm.tqdm(os.listdir(antismash_dir)):
    #if not gbidfull.startswith("CP012600"):
    #    continue
    #print gbidfull

    if len(gbidfull.split("_")) > 2:
        gbid = gbidfull.rsplit("_", 2)[0]
        blast_coord_start, blast_coord_end = gbidfull.split("_")[1:]
        #print "here", gbid, blast_coord_start, blast_coord_end
Example #11
0
#!/usr/bin/env python
import json
import io
from js2py import EvalJs

js_file = io.open('lib/index.js', 'r', encoding="utf-8")
js_code = js_file.read()
context = EvalJs()
context.execute(js_code)
create_doc_json = context.prosemirror.create_doc_json
transform_doc_json = context.prosemirror.transform_doc_json

spec_data_json = '{"nodes":{"content":["doc",{"content":"article","selectable":false},"article",{"defining":true,"content":"title subtitle authors abstract keywords body","selectable":false,"allowGapCursor":false,"attrs":{"papersize":{"default":"A4"},"citationstyle":{"default":""},"documentstyle":{"default":""},"language":{"default":"en-US"},"tracked":{"default":false}},"parseDOM":[{"tag":"div.article"}]},"title",{"content":"text*","marks":"annotation track","group":"part","defining":true,"parseDOM":[{"tag":"div.article-title"}]},"subtitle",{"content":"text*","marks":"annotation track","group":"part","defining":true,"attrs":{"hidden":{"default":true}},"parseDOM":[{"tag":"div.article-subtitle"}]},"authors",{"content":"author*","marks":"annotation track","group":"part","defining":true,"attrs":{"hidden":{"default":true}},"parseDOM":[{"tag":"div.article-authors"}]},"author",{"inline":true,"draggable":true,"attrs":{"firstname":{"default":false},"lastname":{"default":false},"email":{"default":false},"institution":{"default":false}},"parseDOM":[{"tag":"span.author"}]},"abstract",{"content":"(block | table_block)+","group":"part","marks":"annotation","defining":true,"attrs":{"hidden":{"default":true}},"parseDOM":[{"tag":"div.article-abstract"}]},"keywords",{"content":"keyword*","marks":"annotation track","group":"part","defining":true,"attrs":{"hidden":{"default":true}},"parseDOM":[{"tag":"div.article-keywords"}]},"keyword",{"inline":true,"draggable":true,"attrs":{"keyword":{"default":""}},"parseDOM":[{"tag":"span.keyword"}]},"body",{"content":"(block | table_block)+","group":"part","marks":"annotation track","defining":true,"parseDOM":[{"tag":"div.article-body"}]},"paragraph",{"group":"block","content":"inline*","attrs":{"track":{"default":[]}},"parseDOM":[{"tag":"p"}]},"blockquote",{"content":"block+","group":"block","attrs":{"track":{"default":[]}},"marks":"annotation","defining":true,"parseDOM":[{"tag":"blockquote"}]},"horizontal_rule",{"group":"block","attrs":{"track":{"default":[]}},"parseDOM":[{"tag":"hr"}]},"figure",{"group":"block","attrs":{"equation":{"default":""},"image":{"default":false},"figureCategory":{"default":""},"caption":{"default":""},"id":{"default":false},"track":{"default":[]}},"parseDOM":[{"tag":"figure"}]},"heading",{"group":"block","content":"inline*","marks":"_","defining":true,"attrs":{"level":{"default":1},"id":{"default":false},"track":{"default":[]}},"parseDOM":[{"tag":"h1"},{"tag":"h2"},{"tag":"h3"},{"tag":"h4"},{"tag":"h5"},{"tag":"h6"}]},"code_block",{"content":"text*","marks":"annotation track","group":"block","code":true,"defining":true,"attrs":{"track":{"default":[]}},"parseDOM":[{"tag":"pre","preserveWhitespace":"full"}]},"text",{"group":"inline"},"hard_break",{"inline":true,"group":"inline","selectable":false,"parseDOM":[{"tag":"br"}]},"citation",{"inline":true,"group":"inline","attrs":{"format":{"default":"autocite"},"references":{"default":[]}},"parseDOM":[{"tag":"span.citation"}]},"equation",{"inline":true,"group":"inline","attrs":{"equation":{"default":""}},"parseDOM":[{"tag":"span.equation"}]},"footnote",{"inline":true,"group":"inline","attrs":{"footnote":{"default":[{"type":"paragraph"}]}},"parseDOM":[{"tag":"span.footnote-marker[data-footnote]"}]},"ordered_list",{"group":"block","content":"list_item+","attrs":{"order":{"default":1},"track":{"default":[]}},"parseDOM":[{"tag":"ol"}]},"bullet_list",{"group":"block","content":"list_item+","attrs":{"track":{"default":[]}},"parseDOM":[{"tag":"ul"}]},"list_item",{"content":"block+","marks":"annotation","attrs":{"track":{"default":[]}},"parseDOM":[{"tag":"li"}],"defining":true},"table",{"content":"table_row+","tableRole":"table","isolating":true,"group":"table_block","parseDOM":[{"tag":"table"}],"attrs":{"track":{"default":[]}}},"table_row",{"content":"(table_cell | table_header)*","tableRole":"row","parseDOM":[{"tag":"tr"}]},"table_cell",{"marks":"annotation","content":"block+","attrs":{"colspan":{"default":1},"rowspan":{"default":1},"colwidth":{"default":null}},"tableRole":"cell","isolating":true,"parseDOM":[{"tag":"td"}]},"table_header",{"content":"block+","attrs":{"colspan":{"default":1},"rowspan":{"default":1},"colwidth":{"default":null}},"tableRole":"header_cell","isolating":true,"parseDOM":[{"tag":"th"}]}]},"marks":{"content":["em",{"parseDOM":[{"tag":"i"},{"tag":"em"},{"style":"font-style=italic"}]},"strong",{"parseDOM":[{"tag":"strong"},{"tag":"b"},{"style":"font-weight"}]},"link",{"attrs":{"href":{},"title":{"default":null}},"inclusive":false,"parseDOM":[{"tag":"a[href]"}]},"code",{"parseDOM":[{"tag":"code"}]},"comment",{"attrs":{"id":{}},"inclusive":false,"excludes":"","group":"annotation","parseDOM":[{"tag":"span.comment[data-id]"}]},"annotation_tag",{"attrs":{"type":{"default":""},"key":{"default":""},"value":{"default":""}},"inclusive":false,"excludes":"","group":"annotation","parseDOM":[{"tag":"span.annotation-tag[data-type]"}]},"anchor",{"attrs":{"id":{"default":false}},"inclusive":false,"group":"annotation","parseDOM":[{"tag":"span.anchor[data-id]"}]},"deletion",{"attrs":{"user":{"default":0},"username":{"default":""},"date":{"default":0}},"inclusive":false,"group":"track","parseDOM":[{"tag":"span.deletion"}]},"insertion",{"attrs":{"user":{"default":0},"username":{"default":""},"date":{"default":0},"approved":{"default":true}},"inclusive":false,"group":"track","parseDOM":[{"tag":"span.insertion"},{"tag":"span.approved-insertion"}]},"format_change",{"attrs":{"user":{"default":0},"username":{"default":""},"date":{"default":0},"before":{"default":[]},"after":{"default":[]}},"inclusive":false,"group":"track","parseDOM":[{"tag":"span.format-change"}]}]}}'

doc_data_json = '{"type":"doc","content":[{"type":"article","attrs":{"papersize":"A4","citationstyle":"apa","documentstyle":"elephant","language":"en-US","tracked":false},"content":[{"type":"title","content":[{"type":"text","marks":[{"type":"insertion","attrs":{"user":1,"username":"******","date":25710790,"approved":true}}],"text":"testing"}]},{"type":"subtitle","attrs":{"hidden":true}},{"type":"authors","attrs":{"hidden":true}},{"type":"abstract","attrs":{"hidden":true},"content":[{"type":"paragraph","attrs":{"track":[]}}]},{"type":"keywords","attrs":{"hidden":true}},{"type":"body","content":[{"type":"paragraph","attrs":{"track":[]},"content":[{"type":"text","marks":[{"type":"insertion","attrs":{"user":1,"username":"******","date":25710790,"approved":true}}],"text":"the body"}]}]}]}]}'

doc = create_doc_json(doc_data_json, spec_data_json)

step_data_json = '[{"stepType":"replace","from":27,"to":27,"slice":{"content":[{"type":"text","marks":[{"type":"insertion","attrs":{"user":1,"username":"******","date":25710790,"approved":true}}],"text":"X"}]}},{"stepType":"addMark","mark":{"type":"insertion","attrs":{"user":1,"username":"******","date":25716230,"approved":true}},"from":27,"to":28}]'

import time
a = range(100)
start = time.time()

for i in a:
    new_doc = transform_doc_json(step_data_json, doc)
new_doc.toJSON()

stop = time.time()

print((stop - start) / 100)
Example #12
0
class PyJsEngineBase:
    DEFAULT_ENCODING = 'utf-8'
    common_bufsize = 1024

    JS_EXT_NAME = ".js"
    EXT_NAME_SET = {'', JS_EXT_NAME}

    MVAR_PREFIX = "__"
    MVAR_VARS = "__vars__"
    MVAR_SCRIPT_NAME = "__script_name__"
    MVAR_WORKING_DIR = "__working_dir__"

    def __init__(self, logger=None, **kwargs):
        self._logger = logger or get_logger()

        self._script = None
        self._registered_context = {}
        self._prepare_script = []
        self._path = list()

        self._context = None
        self._context_lock = Lock()

        self.MVAR_SET = {
            self.MVAR_SCRIPT_NAME,
            self.MVAR_WORKING_DIR,
        }

    @property
    def path(self):
        return self._path

    def add_to_path(self, path):
        if isinstance(path, str):
            self._path.insert(0, path)

    @property
    def script(self):
        return self._script

    @property
    def context(self):
        with self._context_lock:
            return self._context

    def do_before(self, jskwargs, *args):
        pass

    def do_after(self, jskwargs, *args):
        pass

    def wrapped_method(self, jskwargs, *args, func=None):
        self.do_before(jskwargs, *args)
        result = None
        if func is not None:
            result = func(jskwargs, *args)
        self.do_after(jskwargs, *args)
        return result

    # 注册上下文(context)
    def register_context(self, context):
        if isinstance(context, dict):
            new_context = copy(context)
            for k, v in context.items():
                if isinstance(v, types.MethodType) and getattr(
                        self, v.__name__):
                    new_context[k] = partial(self.wrapped_method, func=v)
            self._registered_context.update(new_context)

        # 强制将非MVAR_PREFIX开头的键名改为大写字母开头
        for k in list(self._registered_context.keys()):
            if isinstance(k, str) and not k.startswith(self.MVAR_PREFIX):
                if k != k.capitalize():
                    v = self._registered_context.pop(k)
                    new_k = k.capitalize()
                    self._registered_context[new_k] = v

    # 清除预备脚本列表
    def clear_prepare_script(self):
        self._prepare_script.clear()

    # 往预备脚本列表添加要预执行的脚本
    def append_prepare_script(self, script):
        self._prepare_script.append(script)

    # 创建JS上下文对象
    def create_js_context(self):
        with self._context_lock:
            if self._context is None:
                registered_context = self._registered_context
                registered_context[self.MVAR_VARS] = {}
                self._context = EvalJs(context=registered_context,
                                       enable_require=True)
                if self._prepare_script:
                    for script in self._prepare_script:
                        self._context.execute(script)

    # 加载脚本
    def load(self, script):
        if isinstance(script, str):
            self._script = script
        else:
            raise ValueError("Script must be type 'str'!")

    # 加载脚本(从文件)
    def load_from_file(self, file, encoding=None):
        script = self.read_from_file(file, encoding=encoding)

        # 切换到脚本文件所在路径
        try:
            base_name = os.path.basename(file)
            dirname = os.path.abspath(file)
            dirname = os.path.dirname(dirname)
            # os.chdir(dirname)
            # logger.debug(msg='OS change to script''s directory (%s)' % (dirname))
            var_dict = self._registered_context
            var_dict[self.MVAR_SCRIPT_NAME] = os.path.splitext(base_name)[0]
            var_dict[self.MVAR_WORKING_DIR] = dirname
            self._path.clear()
            self.add_to_path(os.path.abspath('.'))  # 最后:程序目录
            self.add_to_path(dirname)  # 倒数第二:脚本目录
        except:
            pass

        self.load(script)

    # 加载脚本(从字串)
    def load_from_string(self, source):
        script = self.read_from_string(source)
        self.load(script)

    # 读取脚本文件
    def read_from_file(self, file, encoding=None):
        logger = self._logger

        logger.debug(msg="Loading script from file...")
        fp = None
        try:
            if isinstance(file, str):
                c = encoding
                if c is None:
                    # 如无自定义文件编码,先读文件自动检测编码
                    tmpf = open(file=file, mode='rb')
                    tmps = tmpf.read(self.common_bufsize)
                    tmpchd = crd.detect(tmps)
                    logger.debug(msg="Encoding detected: %s" % (repr(tmpchd)))
                    # 再用codecs按检测到的编码读取内容
                    c = tmpchd['encoding']
                fp = cdc.open(file, encoding=c)
            else:
                fp = file

            script = self.read_from_string(fp.read())

            return script
        finally:
            if fp is not None and isinstance(file, str):
                fp.close()

    # 读取脚本数据
    def read_from_string(self, source):
        if isinstance(source, str):
            return source
        elif isinstance(source, bytes):
            return source.decode(self.DEFAULT_ENCODING)

        return None

    # 执行脚本
    def run(self, temp_script=None):
        script = temp_script
        if script is None:
            script = self._script

        self.create_js_context()
        self.context.execute(script)

    def get_vars(self):
        return self.context[self.MVAR_VARS]

    def get_vars_dict(self, vars=None):
        if vars is None:
            # 如未指定vars,则使用内置上下文vars
            vars = self.get_vars()
        if isinstance(vars, JsObjectWrapper):
            # 确保vars为非JsObjectWrapper
            vars = vars.to_dict()
        return vars

    # 解析参数字典(所有参数;仅做类型转换)
    @staticmethod
    def args_parser_all(jskwargs):
        return jskwargs.to_dict() if isinstance(jskwargs,
                                                JsObjectWrapper) else jskwargs

    # 解析参数字典,按规则规范参数格式及赋予默认值
    # jskwargs Js传递的参数字典,rules 解析规则字典(格式:{key:(name, type, default)},type有 s字串/sr原始字串/i整数/r实数/b布尔 )
    def args_parser(self, jskwargs, rules):
        if not isinstance(rules, dict):
            return None

        if isinstance(jskwargs, JsObjectWrapper):
            # 确保jskwargs为非JsObjectWrapper
            jskwargs = jskwargs.to_dict()

        if not isinstance(jskwargs, dict):
            return None

        args = {}
        keys = set(jskwargs.keys())
        for pk, pv in rules.items():
            if isinstance(pk, str) and isinstance(pv,
                                                  tuple) and len(pv) in {2, 3}:
                # pk: key, pn: name, pt: type, pd: default
                lpv = len(pv)
                if lpv == 2:
                    pn, pt = pv
                    pd = None
                    if pk not in keys:
                        continue
                else:
                    pn, pt, pd = pv

                value = jskwargs.get(pk, pd)
                if isinstance(value, bytes):
                    value = value.decode(self.DEFAULT_ENCODING)
                if pt in {'s', 'i', 'r', 'b'}:
                    value = self.var_replacer(value) if isinstance(
                        value, str) else value
                if pt in {'s', 'sr'}:
                    args[pn] = str(value) if value is not None else None
                elif pt in {'i', 'ir'}:
                    args[pn] = (int(eval(str(value))) if value is not None else
                                None) if not isinstance(value, int) else value
                elif pt in {'r', 'rr'}:
                    args[pn] = (eval(str(value)) if value is not None else None
                                ) if not isinstance(value, float) else value
                elif pt in {'b', 'br'}:
                    args[pn] = str(value).lower() == str(True).lower()

        if self.MVAR_VARS not in args:
            try:
                args[self.MVAR_VARS] = self.get_vars_dict()
            except:
                pass

        return args

    # 变量标识替换
    def var_replacer(self,
                     v_str,
                     vars=None,
                     v_prefix=r"$%",
                     v_suffix=r"%$",
                     re_prefix=r"\$\%",
                     re_suffix=r"\%\$"):
        vars = self.get_vars_dict(vars=vars)
        return self.var_replacer_raw(vars,
                                     v_str,
                                     v_prefix=v_prefix,
                                     v_suffix=v_suffix,
                                     re_prefix=re_prefix,
                                     re_suffix=re_suffix)

    # 变量标识替换(原始方法)
    @staticmethod
    def var_replacer_raw(var_dict,
                         v_str,
                         v_prefix=r"$%",
                         v_suffix=r"%$",
                         re_prefix=r"\$\%",
                         re_suffix=r"\%\$"):
        keys = re.findall(re_prefix + r"(.+?)" + re_suffix, v_str)
        d_keys = []
        for i in keys:
            value = var_dict.get(i)
            if value is not None:
                d_keys.append(i)

        o_str = deepcopy(v_str)
        for i in d_keys:
            # 注:这里var_dict.get(i)需要强制转字符串,适应var_dict中保存包含字符串以外类型对象的情况
            o_str = o_str.replace(v_prefix + i + v_suffix,
                                  str(var_dict.get(i)))
        return o_str
Example #13
0
def get_orfs(gbidfull, antismash_dir, gene_outfile, ks_outfile, stdoutfile):
    filename = os.path.join(antismash_dir, gbidfull, "geneclusters.js")

    if not os.path.exists(filename):
        print "Not found %s" % filename
        return

    gbid, gbid_start, gbid_end = parse_gbidfull(gbidfull)

    # Read antismash json file
    f = open(filename, 'r')
    data_js = f.read()
    context = EvalJs()
    context.execute(data_js)
    geneclusters = context.geneclusters.to_dict()
    details_data = context.details_data.to_dict()

    for cluster_id in geneclusters.iterkeys():
        # Parse only pks clusters:
        clustertype = geneclusters[cluster_id]["type"]
        pkss = ['t1pks', 'transatpks']
        if not any(pks in clustertype for pks in pkss):
            continue

        # Correct gene coord if gbid sequence was split
        antismash_start = int(geneclusters[cluster_id]["start"])
        antismash_end = int(geneclusters[cluster_id]["end"])
        cluster_start = gbid_start + antismash_start
        cluster_end = gbid_start + antismash_end

        print "Cluster ", cluster_id, cluster_start, cluster_end, "....."
        locus_coords = {}
        for orfs in geneclusters[cluster_id]["orfs"]:
            antismash_prot_start = int(orfs["start"])
            antismash_prot_end = int(orfs["end"])
            #print antismash_prot_start, antismash_prot_end
            # Correct gene coord if gbid sequence was split
            prot_start = gbid_start + antismash_prot_start
            prot_end = gbid_start + antismash_prot_end

            locus_tag = orfs["locus_tag"]
            locus_coords[locus_tag] = (prot_start, prot_end)
            description = orfs["description"]
            name1 = description.split("</span><br>")[0]
            name = name1.split("<span class=\"svgene-tooltip-bold\">")[1]
            d = description.split("QUERY=")[1]
            sequence1 = d.split("_LOC=protein")[0]
            sequence = sequence1.split("&LINK")[0]

            # Write protein sequences in a fasta file
            fasta_id = ">%s|%s|%s-%s|%s|%s-%s|%s" % \
                (gbid, cluster_id, cluster_start, cluster_end,
                 clustertype, prot_start, prot_end, locus_tag)
            gene_outfile.write("%s\n%s\n" % (fasta_id, sequence))

        if not details_data:
            print "%s\t%s\tEmpty details_data" % \
                (gbidfull, abs(cluster_end - cluster_start))
            stdoutfile.write("%s\t%s\tEmpty details_data\n" %
                             (gbidfull, abs(cluster_end - cluster_start)))
            continue

        for orfs in details_data[cluster_id]["orfs"]:
            locus_tag = orfs["id"]
            for domain in orfs["domains"]:
                domain_type = domain["type"]
                if domain_type != "PKS_KS":
                    continue
                ks_seq = domain["sequence"]

                ks_start = locus_coords[locus_tag][0] + int(domain["start"])
                ks_end = locus_coords[locus_tag][0] + int(domain["end"])
                print "KS %s %s %s" % (ks_start, ks_end, ks_seq[:20])

                # Write protein sequences in a fasta file
                fasta_id = ">%s|%s|%s-%s|%s|%s-%s|%s|%s" % \
                    (gbid, cluster_id, cluster_start, cluster_end,
                     clustertype, ks_start, ks_end, locus_tag, domain_type)
                ks_outfile.write("%s\n%s\n" % (fasta_id, ks_seq))
Example #14
0
def _download(client, message):
    user_id = message.from_user.id
    if not message.media:
        sent_message = message.reply_text('🕵️**Checking link...**', quote=True)
        if message.command:
            link = message.command[1]
        else:
            link = message.text
        if 'drive.google.com' in link:
            sent_message.edit(Messages.CLONING.format(link))
            LOGGER.info(f'Copy:{user_id}: {link}')
            msg = GoogleDrive(user_id).clone(link)
            sent_message.edit(msg)
        elif 'mediafire.com' in link:
            """ MediaFire """
            if '|' in link:
                link, filename = link.split('|')
                link = link.strip()
                filename.strip()
                dl_path = os.path.join(
                    f'{Config.DOWNLOAD_DIRECTORY}/{filename}')
            else:
                link = link.strip()
                filename = os.path.basename(link)
                dl_path = Config.DOWNLOAD_DIRECTORY
            try:
                link = re.findall(r'\bhttps?://.*mediafire\.com\S+', url)[0]
            except IndexError:
                return sent_message.edit(
                    "No MediaFire links found\nMight Be File/Files Deleted. \nOpen The Link And Check"
                )
            page = BeautifulSoup(requests.get(link).content, 'lxml')
            info = page.find('a', {'aria-label': 'Download file'})
            link = info.get('href')
            sent_message.edit(Messages.DOWNLOADING.format(link))
            result, file_path = download_file(link, dl_path)
            if result == True:
                sent_message.edit(
                    Messages.DOWNLOADED_SUCCESSFULLY.format(
                        os.path.basename(file_path),
                        humanbytes(os.path.getsize(file_path))))
                msg = GoogleDrive(user_id).upload_file(file_path)
                sent_message.edit(msg)
                # LOGGER.info(f'Deleteing: {file_path}')
                os.remove(file_path)
            else:
                sent_message.edit(
                    Messages.DOWNLOAD_ERROR.format(file_path, link))
        elif 'zippyshare.com' in link:
            """ ZippyShare """
            if '|' in link:
                link, filename = link.split('|')
                link = link.strip()
                filename.strip()
                dl_path = os.path.join(
                    f'{Config.DOWNLOAD_DIRECTORY}/{filename}')
            else:
                link = link.strip()
                filename = os.path.basename(link)
                dl_path = Config.DOWNLOAD_DIRECTORY
            try:
                link = re.findall(r'\bhttps?://.*zippyshare\.com\S+', url)[0]
            except IndexError:
                raise DirectDownloadLinkException("No Zippyshare links found")
            try:
                base_url = re.search('http.+.zippyshare.com', link).group()
                response = requests.get(link).content
                pages = BeautifulSoup(response, "lxml")
                try:
                    js_script = pages.find("div", {
                        "class": "center"
                    }).find_all("script")[1]
                except IndexError:
                    js_script = pages.find("div", {
                        "class": "right"
                    }).find_all("script")[0]
                js_content = re.findall(r'\.href.=."/(.*?)";', str(js_script))
                js_content = 'var x = "/' + js_content[0] + '"'
                evaljs = EvalJs()
                setattr(evaljs, "x", None)
                evaljs.execute(js_content)
                js_content = getattr(evaljs, "x")
                link = base_url + js_content
            except IndexError:
                raise sent_message.edit("ERROR: Can't find download button")
            sent_message.edit(Messages.DOWNLOADING.format(link))
            result, file_path = download_file(link, dl_path)
            if result == True:
                sent_message.edit(
                    Messages.DOWNLOADED_SUCCESSFULLY.format(
                        os.path.basename(file_path),
                        humanbytes(os.path.getsize(file_path))))
                msg = GoogleDrive(user_id).upload_file(file_path)
                sent_message.edit(msg)
                # LOGGER.info(f'Deleteing: {file_path}')
                os.remove(file_path)
            else:
                sent_message.edit(
                    Messages.DOWNLOAD_ERROR.format(file_path, link))
        elif 'anonfiles.com' in link:
            """ Anonfiles """
            if '|' in link:
                link, filename = link.split('|')
                link = link.strip()
                filename.strip()
                dl_path = os.path.join(
                    f'{Config.DOWNLOAD_DIRECTORY}/{filename}')
            else:
                link = link.strip()
                filename = os.path.basename(link)
                dl_path = Config.DOWNLOAD_DIRECTORY
            bypasser = lk21.Bypass()
            link = bypasser.bypass_anonfiles(link)
            sent_message.edit(Messages.DOWNLOADING.format(link))
            result, file_path = download_file(link, dl_path)
            if result == True:
                sent_message.edit(
                    Messages.DOWNLOADED_SUCCESSFULLY.format(
                        os.path.basename(file_path),
                        humanbytes(os.path.getsize(file_path))))
                msg = GoogleDrive(user_id).upload_file(file_path)
                sent_message.edit(msg)
                # LOGGER.info(f'Deleteing: {file_path}')
                os.remove(file_path)
            else:
                sent_message.edit(
                    Messages.DOWNLOAD_ERROR.format(file_path, link))
        elif 'bayfiles.com' in link:
            """ Bayfiles """
            if '|' in link:
                link, filename = link.split('|')
                link = link.strip()
                filename.strip()
                dl_path = os.path.join(
                    f'{Config.DOWNLOAD_DIRECTORY}/{filename}')
            else:
                link = link.strip()
                filename = os.path.basename(link)
                dl_path = Config.DOWNLOAD_DIRECTORY
            bypasser = lk21.Bypass()
            link = bypasser.bypass_anonfiles(link)
            sent_message.edit(Messages.DOWNLOADING.format(link))
            result, file_path = download_file(link, dl_path)
            if result == True:
                sent_message.edit(
                    Messages.DOWNLOADED_SUCCESSFULLY.format(
                        os.path.basename(file_path),
                        humanbytes(os.path.getsize(file_path))))
                msg = GoogleDrive(user_id).upload_file(file_path)
                sent_message.edit(msg)
                # LOGGER.info(f'Deleteing: {file_path}')
                os.remove(file_path)
        elif 'racaty.net' in link:
            """ Racaty """
            if '|' in link:
                link, filename = link.split('|')
                link = link.strip()
                filename.strip()
                dl_path = os.path.join(
                    f'{Config.DOWNLOAD_DIRECTORY}/{filename}')
            else:
                link = link.strip()
                filename = os.path.basename(link)
                dl_path = Config.DOWNLOAD_DIRECTORY
            dl_url = ''
            try:
                link = re.findall(r'\bhttps?://.*racaty\.net\S+', url)[0]
            except IndexError:
                raise ("No Racaty links found\n")
            scraper = cfscrape.create_scraper()
            r = scraper.get(url)
            soup = BeautifulSoup(r.text, "lxml")
            op = soup.find("input", {"name": "op"})["value"]
            ids = soup.find("input", {"name": "id"})["value"]
            rpost = scraper.post(url, data={"op": op, "id": ids})
            rsoup = BeautifulSoup(rpost.text, "lxml")
            dl_url = rsoup.find("a",
                                {"id": "uniqueExpirylink"})["href"].replace(
                                    " ", "%20")
            link = dl_url
            result, file_path = download_file(link, dl_path)
            if result == True:
                sent_message.edit(
                    Messages.DOWNLOADED_SUCCESSFULLY.format(
                        os.path.basename(file_path),
                        humanbytes(os.path.getsize(file_path))))
                msg = GoogleDrive(user_id).upload_file(file_path)
                sent_message.edit(msg)
                LOGGER.info(f'Deleteing: {file_path}')
                os.remove(file_path)
            else:
                sent_message.edit(
                    Messages.DOWNLOAD_ERROR.format(file_path, link))

        # elif '1fichier.com' in link:
        elif 'ouo.press' in link or 'ouo.io' in link:
            """ Ouo Bypass """
            bypasser = lk21.Bypass()
            link = bypasser.bypass_ouo(link)
            sent_message.edit(
                f"Bypassed The Ouo Link That U Provided.\n\nLink = `{link}`")

        # elif is_gdtot_link(link):

        else:
            if '|' in link:
                link, filename = link.split('|')
                link = link.strip()
                filename.strip()
                dl_path = os.path.join(
                    f'{Config.DOWNLOAD_DIRECTORY}/{filename}')
            else:
                link = link.strip()
                filename = os.path.basename(link)
                dl_path = Config.DOWNLOAD_DIRECTORY
            LOGGER.info(f'Download:{user_id}: {link}')
            sent_message.edit(Messages.DOWNLOADING.format(link))
            result, file_path = download_file(link, dl_path)
            if result == True:
                sent_message.edit(
                    Messages.DOWNLOADED_SUCCESSFULLY.format(
                        os.path.basename(file_path),
                        humanbytes(os.path.getsize(file_path))))
                msg = GoogleDrive(user_id).upload_file(file_path)
                sent_message.edit(msg)
                LOGGER.info(f'Deleteing: {file_path}')
                os.remove(file_path)
            else:
                sent_message.edit(
                    Messages.DOWNLOAD_ERROR.format(file_path, link))