예제 #1
0
def test_isotime_iso():
    iso_date = now_as_iso()
    iso_format = re.compile(r'[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}Z')

    assert isinstance(iso_date, str)
    assert iso_format.match(iso_date)
    assert epoch_to_iso(iso_to_epoch(iso_date)) == iso_date
    assert iso_date == epoch_to_iso(local_to_epoch(epoch_to_local(iso_to_epoch(iso_date))))
def main():
    for day in range(31):
        today = now_as_iso(24 * 60 * 60 * day)
        query = "__expiry_ts__:[%s TO %s+1DAY]" % (today, today)
        minutes_params = (
            ("rows", "0"),
            ("facet", "on"),
            ("facet.date", "__expiry_ts__"),
            ("facet.date.start", today),
            ("facet.date.end", today + "+1DAY"),
            ("facet.date.gap", "+1MINUTE"),
            ("facet.mincount", "1"),
        )
        res_minutes = datastore.direct_search("emptyresult", query, args=minutes_params)
        minutes = res_minutes.get("facet_counts", {}).get("facet_dates", {}).get("__expiry_ts__", {})
        for minute, minute_count in minutes.iteritems():
            if minute in ['end', 'gap', 'start']:
                continue

            if minute_count > 0:
                for x in datastore.stream_search('emptyresult', "__expiry_ts__:[%s TO %s+1MINUTE]" % (minute, minute)):
                    try:
                        created = epoch_to_iso(iso_to_epoch(today) - (15 * 24 * 60 * 60))
                        riak_key = x['_yz_rk']

                        path = os.path.join(directory, created[:10]) + '.index'
                        fh = get_filehandle(path)

                        fh.write(riak_key + "\n")
                        fh.flush()

                    except:  # pylint: disable=W0702
                        logger.exception('Unhandled exception:')
예제 #3
0
def load_journal(name, delete_queue):
    working_dir = config.core.expiry.journal.directory
    expiry_ttl = config.core.expiry.journal.ttl * 24 * 60 * 60
    log.debug("Expiry will load journal in %s for %s bucket." %
              (working_dir, name))
    while True:
        try:
            for listed_file in os.listdir(working_dir):
                journal_file = os.path.join(working_dir, listed_file)
                if os.path.isfile(journal_file):
                    if journal_file.endswith(name):
                        cur_time = now()
                        day = "%sT00:00:00Z" % listed_file.split(".")[0]
                        file_time = iso_to_epoch(day)
                        if file_time + expiry_ttl <= cur_time:
                            with open(journal_file) as to_delete_journal:
                                count = 0
                                for line in to_delete_journal:
                                    if count % 1000 == 0:
                                        while delete_queue.length(
                                        ) > MAX_QUEUE_LENGTH:
                                            time.sleep(SLEEP_TIME)

                                    line = line.strip()
                                    if line:
                                        delete_queue.push(line)

                                    count += 1

                            os.unlink(journal_file)
        except OSError:
            pass

        time.sleep(SLEEP_TIME)
예제 #4
0
def seconds(t, default=0):
    try:
        try:
            return float(t)
        except ValueError:
            return iso_to_epoch(t)
    except:  # pylint:disable=W0702
        return default
예제 #5
0
def test_isotime_local():
    local_date = now_as_local()
    local_format = re.compile(r'[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}.*')

    assert isinstance(local_date, str)
    assert local_format.match(local_date)
    assert epoch_to_local(local_to_epoch(local_date)) == local_date
    assert local_date == epoch_to_local(iso_to_epoch(epoch_to_iso(local_to_epoch(local_date))))
def git_clone_repo(source: Dict[str, Any], previous_update=None) -> List:
    name = source['name']
    url = source['uri']
    pattern = source.get('pattern', None)
    key = source.get('private_key', None)
    ssl_ignore = source.get("ssl_ignore_errors", False)
    ca_cert = source.get("ca_cert")

    git_env = {}
    if ssl_ignore:
        git_env['GIT_SSL_NO_VERIFY'] = 1

    if ca_cert:
        LOGGER.info(f"A CA certificate has been provided with this source.")
        add_cacert(ca_cert)
        git_env['GIT_SSL_CAINFO'] = certifi.where()

    clone_dir = os.path.join(UPDATE_DIR, name)
    if os.path.exists(clone_dir):
        shutil.rmtree(clone_dir)

    if key:
        LOGGER.info(f"key found for {url}")
        # Save the key to a file
        git_ssh_identity_file = os.path.join(tempfile.gettempdir(), 'id_rsa')
        with open(git_ssh_identity_file, 'w') as key_fh:
            key_fh.write(key)
        os.chmod(git_ssh_identity_file, 0o0400)

        git_ssh_cmd = f"ssh -oStrictHostKeyChecking=no -i {git_ssh_identity_file}"
        git_env['GIT_SSH_COMMAND'] = git_ssh_cmd

    repo = Repo.clone_from(url, clone_dir, env=git_env)

    # Check repo last commit
    if previous_update:
        if isinstance(previous_update, str):
            previous_update = iso_to_epoch(previous_update)
        for c in repo.iter_commits():
            if c.committed_date < previous_update:
                return []
            break

    sigma_files = []
    for path_in_dir, _, files in os.walk(clone_dir):
        for filename in files:
            filepath = os.path.join(clone_dir, path_in_dir, filename)
            if pattern:
                if re.match(pattern, filepath):
                    sigma_files.append(
                        (filepath, get_sha256_for_file(filepath)))
            else:
                if re.match(R'.*\.yml', filepath):
                    sigma_files.append(
                        (filepath, get_sha256_for_file(filepath)))

    return sigma_files
예제 #7
0
def _get_cached_signatures(signature_cache, last_modified, query_hash):
    try:
        if signature_cache.getmtime(query_hash) > iso_to_epoch(last_modified):
            s = signature_cache.get(query_hash)
            return make_file_response(
                s, "al_yara_signatures.yar", len(s), content_type="text/yara"
            )
    except:  # pylint: disable=W0702
        LOGGER.exception('Failed to read cached signatures:')

    return None
예제 #8
0
def update_available(**_):  # pylint: disable=W0613
    """
    Check if updated signatures are.

    Variables:
    None

    Arguments:
    last_update        => Epoch time of last update.

    Data Block:
    None

    Result example:
    { "update_available" : true }      # If updated rules are available.
    """
    last_update = iso_to_epoch(request.args.get('last_update'))
    last_modified = iso_to_epoch(STORAGE.get_signatures_last_modified())

    return make_api_response({"update_available": last_modified > last_update})
예제 #9
0
    def _get_version_map(self):
        self.engine_map = {}
        engine_list = []
        newest_dat = 0
        oldest_dat = now()

        url = self.cfg.get('BASE_URL') + "stat/engines"
        try:
            r = self.session.get(url=url, timeout=self.timeout)
        except requests.exceptions.Timeout:
            raise Exception("Metadefender service timeout.")

        engines = r.json()

        for engine in engines:
            if self.cfg.get("MD_VERSION") == 4:
                name = self._format_engine_name(engine["eng_name"])
                version = engine['eng_ver']
                def_time = engine['def_time']
                etype = engine['engine_type']
            elif self.cfg.get("MD_VERSION") == 3:
                name = self._format_engine_name(engine["eng_name"]).replace(
                    "scanengine", "")
                version = engine['eng_ver']
                def_time = engine['def_time'].replace(" AM", "").replace(
                    " PM", "").replace("/", "-").replace(" ", "T")
                def_time = def_time[6:10] + "-" + def_time[:5] + def_time[
                    10:] + "Z"
                etype = engine['eng_type']
            else:
                raise Exception("Unknown metadefender version")

            # Compute newest DAT
            dat_epoch = iso_to_epoch(def_time)
            if dat_epoch > newest_dat:
                newest_dat = dat_epoch

            if dat_epoch < oldest_dat and dat_epoch != 0 and etype in [
                    "av", "Bundled engine"
            ]:
                oldest_dat = dat_epoch

            self.engine_map[name] = {
                'version': version,
                'def_time': iso_to_local(def_time)[:19]
            }
            engine_list.append(name)
            engine_list.append(version)
            engine_list.append(def_time)

        self.newest_dat = epoch_to_local(newest_dat)[:19]
        self.oldest_dat = epoch_to_local(oldest_dat)[:19]
        self.dat_hash = hashlib.md5("".join(engine_list)).hexdigest()
예제 #10
0
def update_available(**_):
    """
    Check if updated signatures are.

    Variables:
    None

    Arguments:
    last_update        => ISO time of last update.
    type               => Signature type to check

    Data Block:
    None

    Result example:
    { "update_available" : true }      # If updated rules are available.
    """
    sig_type = request.args.get('type', '*')
    last_update = iso_to_epoch(
        request.args.get('last_update', '1970-01-01T00:00:00.000000Z'))
    last_modified = iso_to_epoch(STORAGE.get_signature_last_modified(sig_type))

    return make_api_response({"update_available": last_modified > last_update})
def git_clone_repo(download_directory: str, source: Dict[str, Any], cur_logger,
                   previous_update=None) -> List[str] and List[str]:
    name = source['name']
    url = source['uri']
    pattern = source.get('pattern', None)
    key = source.get('private_key', None)

    clone_dir = os.path.join(download_directory, name)
    if os.path.exists(clone_dir):
        shutil.rmtree(clone_dir)
    os.makedirs(clone_dir)

    if key:
        cur_logger.info(f"key found for {url}")
        # Save the key to a file
        git_ssh_identity_file = os.path.join(tempfile.gettempdir(), 'id_rsa')
        if os.path.exists(git_ssh_identity_file):
            os.unlink(git_ssh_identity_file)
        with open(git_ssh_identity_file, 'w') as key_fh:
            key_fh.write(key)
        os.chmod(git_ssh_identity_file, 0o0400)

        git_ssh_cmd = f"ssh -oStrictHostKeyChecking=no -i {git_ssh_identity_file}"
        repo = Repo.clone_from(url, clone_dir, env={"GIT_SSH_COMMAND": git_ssh_cmd})
    else:
        repo = Repo.clone_from(url, clone_dir)

    # Check repo last commit
    if previous_update:
        if isinstance(previous_update, str):
            previous_update = iso_to_epoch(previous_update)
        for c in repo.iter_commits():
            if c.committed_date < previous_update:
                cur_logger.info("There are no new commits, skipping repository...")
                return []
            break

    if pattern:
        files = [os.path.join(dp, f)
                 for dp, dn, filenames in os.walk(clone_dir)
                 for f in filenames if re.match(pattern, f)]
    else:
        files = glob.glob(os.path.join(clone_dir, '*.yar*'))

    if not files:
        cur_logger.warning(f"Could not find any yara file matching pattern: {pattern or '*.yar*'}")

    return files
예제 #12
0
def git_clone_repo(source: Dict[str, Any],
                   previous_update: int = None,
                   default_pattern: str = "*",
                   logger=None,
                   output_dir: str = None) -> List[Tuple[str, str]]:
    name = source['name']
    url = source['uri']
    pattern = source.get('pattern', None)
    key = source.get('private_key', None)
    username = source.get('username', None)
    password = source.get('password', None)

    ignore_ssl_errors = source.get("ssl_ignore_errors", False)
    ca_cert = source.get("ca_cert")
    proxy = source.get('proxy', None)
    auth = f'{username}:{password}@' if username and password else None

    git_config = None
    git_env = {}

    if ignore_ssl_errors:
        git_env['GIT_SSL_NO_VERIFY'] = '1'

    # Let https requests go through proxy
    if proxy:
        os.environ['https_proxy'] = proxy

    if ca_cert:
        logger.info("A CA certificate has been provided with this source.")
        add_cacert(ca_cert)
        git_env['GIT_SSL_CAINFO'] = certifi.where()

    if auth:
        logger.info("Credentials provided for auth..")
        url = re.sub(r'^(?P<scheme>https?://)', fr'\g<scheme>{auth}', url)

    clone_dir = os.path.join(output_dir, name)
    if os.path.exists(clone_dir):
        shutil.rmtree(clone_dir)

    with tempfile.NamedTemporaryFile() as git_ssh_identity_file:
        if key:
            logger.info(f"key found for {url}")
            # Save the key to a file
            git_ssh_identity_file.write(key.encode())
            git_ssh_identity_file.seek(0)
            os.chmod(git_ssh_identity_file.name, 0o0400)

            git_ssh_cmd = f"ssh -oStrictHostKeyChecking=no -i {git_ssh_identity_file.name}"
            git_env['GIT_SSH_COMMAND'] = git_ssh_cmd

        repo = Repo.clone_from(url,
                               clone_dir,
                               env=git_env,
                               git_config=git_config)

        # Check repo last commit
        if previous_update:
            if isinstance(previous_update, str):
                previous_update = iso_to_epoch(previous_update)
            for c in repo.iter_commits():
                if c.committed_date < previous_update and not FORCE_UPDATE:
                    raise SkipSource()
                break

    # Clear proxy setting
    if proxy:
        del os.environ['https_proxy']

    return filter_downloads(clone_dir, pattern, default_pattern)
    def _get_version_map(self, node: str) -> None:
        """
        Get the versions of all engines running on a given node
        :param node: The IP of the MetaDefender node
        :return: None
        """
        newest_dat = 0
        oldest_dat = now()
        engine_list = []
        active_engines = 0
        failed_states = ["removed", "temporary failed", "permanently failed"]
        url = urljoin(node, 'stat/engines')

        try:
            self.log.debug(f"_get_version_map: GET {url}")
            r = self.session.get(url=url, timeout=self.timeout)
            engines = r.json()

            for engine in engines:
                if engine['active'] and engine["state"] not in failed_states:
                    active_engines += 1

                if self.config.get("md_version") == 4:
                    name = self._format_engine_name(engine["eng_name"])
                    version = engine['eng_ver']
                    def_time = engine['def_time']
                    etype = engine['engine_type']
                elif self.config.get("md_version") == 3:
                    name = self._format_engine_name(
                        engine["eng_name"]).replace("scanengine", "")
                    version = engine['eng_ver']
                    def_time = engine['def_time'].replace(" AM", "").replace(
                        " PM", "").replace("/", "-").replace(" ", "T")
                    def_time = def_time[6:10] + "-" + def_time[:5] + def_time[
                        10:] + "Z"
                    etype = engine['eng_type']
                else:
                    raise Exception("Unknown version of MetaDefender")

                # Compute newest DAT
                dat_epoch = iso_to_epoch(def_time)
                if dat_epoch > newest_dat:
                    newest_dat = dat_epoch

                if dat_epoch < oldest_dat and dat_epoch != 0 and etype in [
                        "av", "Bundled engine"
                ]:
                    oldest_dat = dat_epoch

                self.nodes[node]['engine_map'][name] = {
                    'version': version,
                    'def_time': iso_to_local(def_time)[:19]
                }
                engine_list.append(name)
                engine_list.append(version)
                engine_list.append(def_time)

            self.nodes[node]['engine_count'] = active_engines
            self.nodes[node]['newest_dat'] = epoch_to_local(newest_dat)[:19]
            self.nodes[node]['oldest_dat'] = epoch_to_local(oldest_dat)[:19]
            self.nodes[node]['engine_list'] = "".join(engine_list)
        except exceptions.Timeout:
            raise Exception(
                f"Node ({node}) timed out after {self.timeout}s while trying to get engine version map"
            )
        except ConnectionError:
            raise Exception(
                f"Unable to connect to node ({node}) while trying to get engine version map"
            )
def url_download(source: Dict[str, Any], previous_update=None) -> List:
    """
    :param source:
    :param previous_update:
    :return:
    """
    name = source['name']
    uri = source['uri']
    pattern = source.get('pattern', None)
    username = source.get('username', None)
    password = source.get('password', None)
    ca_cert = source.get('ca_cert', None)
    ignore_ssl_errors = source.get('ssl_ignore_errors', False)
    auth = (username, password) if username and password else None

    headers = source.get('headers', None)

    LOGGER.info(
        f"{name} source is configured to {'ignore SSL errors' if ignore_ssl_errors else 'verify SSL'}."
    )
    if ca_cert:
        LOGGER.info(f"A CA certificate has been provided with this source.")
        add_cacert(ca_cert)

    # Create a requests session
    session = requests.Session()
    session.verify = not ignore_ssl_errors

    try:
        if isinstance(previous_update, str):
            previous_update = iso_to_epoch(previous_update)

        # Check the response header for the last modified date
        response = session.head(uri, auth=auth, headers=headers)
        last_modified = response.headers.get('Last-Modified', None)
        if last_modified:
            # Convert the last modified time to epoch
            last_modified = time.mktime(
                time.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z"))

            # Compare the last modified time with the last updated time
            if previous_update and last_modified <= previous_update:
                # File has not been modified since last update, do nothing
                return []

        if previous_update:
            previous_update = time.strftime("%a, %d %b %Y %H:%M:%S %Z",
                                            time.gmtime(previous_update))
            if headers:
                headers['If-Modified-Since'] = previous_update
            else:
                headers = {'If-Modified-Since': previous_update}

        response = session.get(uri, auth=auth, headers=headers)

        # Check the response code
        if response.status_code == requests.codes['not_modified']:
            # File has not been modified since last update, do nothing
            return []
        elif response.ok:
            if not os.path.exists(UPDATE_DIR):
                os.makedirs(UPDATE_DIR)

            file_name = os.path.basename(urlparse(uri).path)
            file_path = os.path.join(UPDATE_DIR, file_name)
            with open(file_path, 'wb') as f:
                f.write(response.content)

            rules_files = None
            if file_name.endswith('tar.gz'):
                extract_dir = os.path.join(UPDATE_DIR, name)
                shutil.unpack_archive(file_path, extract_dir=extract_dir)

                rules_files = set()
                for path_in_dir, _, files in os.walk(extract_dir):
                    for filename in files:
                        filepath = os.path.join(extract_dir, path_in_dir,
                                                filename)
                        if pattern:
                            if re.match(pattern, filepath):
                                rules_files.add(filepath)
                        else:
                            rules_files.add(filepath)

            return [(f, get_sha256_for_file(f))
                    for f in rules_files or [file_path]]

    except requests.Timeout:
        # TODO: should we retry?
        pass
    except Exception as e:
        # Catch all other types of exceptions such as ConnectionError, ProxyError, etc.
        LOGGER.info(str(e))
        exit()
        # TODO: Should we exit even if one file fails to download? Or should we continue downloading other files?
    finally:
        # Close the requests session
        session.close()
def url_download(download_directory: str, source: Dict[str, Any], cur_logger, previous_update=None) -> Optional[str]:
    if os.path.exists(download_directory):
        shutil.rmtree(download_directory)
    os.makedirs(download_directory)

    name = source['name']
    uri = source['uri']
    username = source.get('username', None)
    password = source.get('password', None)
    auth = (username, password) if username and password else None

    headers = source.get('headers', None)

    # Create a requests session
    session = requests.Session()

    try:
        if isinstance(previous_update, str):
            previous_update = iso_to_epoch(previous_update)

        # Check the response header for the last modified date
        response = session.head(uri, auth=auth, headers=headers)
        last_modified = response.headers.get('Last-Modified', None)
        if last_modified:
            # Convert the last modified time to epoch
            last_modified = time.mktime(time.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z"))

            # Compare the last modified time with the last updated time
            if previous_update and last_modified <= previous_update:
                # File has not been modified since last update, do nothing
                cur_logger.info("The file has not been modified since last run, skipping...")
                return

        if previous_update:
            previous_update = time.strftime("%a, %d %b %Y %H:%M:%S %Z", time.gmtime(previous_update))
            if headers:
                headers['If-Modified-Since'] = previous_update
            else:
                headers = {'If-Modified-Since': previous_update}

        response = session.get(uri, auth=auth, headers=headers)

        # Check the response code
        if response.status_code == requests.codes['not_modified']:
            # File has not been modified since last update, do nothing
            cur_logger.info("The file has not been modified since last run, skipping...")
            return
        elif response.ok:
            file_name = os.path.basename(f"{name}.yar")  # TODO: make filename as source name with extension .yar
            file_path = os.path.join(download_directory, file_name)
            with open(file_path, 'wb') as f:
                f.write(response.content)

            # Return file_path
            return file_path
    except requests.Timeout:
        # TODO: should we retry?
        pass
    except Exception as e:
        # Catch all other types of exceptions such as ConnectionError, ProxyError, etc.
        cur_logger.info(str(e))
        exit()
        # TODO: Should we exit even if one file fails to download? Or should we continue downloading other files?
    finally:
        # Close the requests session
        session.close()
예제 #16
0
def resubmit_for_dynamic(sha256, *args, **kwargs):
    """
    Resubmit a file for dynamic analysis

    Variables:
    sha256         => Resource locator (SHA256)

    Arguments (Optional):
    copy_sid    => Mimic the attributes of this SID.
    name        => Name of the file for the submission

    Data Block:
    None

    Result example:
    # Submission message object as a json dictionary
    """
    user = kwargs['user']
    quota_error = check_submission_quota(user)
    if quota_error:
        return make_api_response("", quota_error, 503)

    file_info = STORAGE.file.get(sha256, as_obj=False)
    if not file_info:
        return make_api_response(
            {},
            f"File {sha256} cannot be found on the server therefore it cannot be resubmitted.",
            status_code=404)

    if not Classification.is_accessible(user['classification'],
                                        file_info['classification']):
        return make_api_response(
            "",
            "You are not allowed to re-submit a file that you don't have access to",
            403)

    submit_result = None
    try:
        copy_sid = request.args.get('copy_sid', None)
        name = safe_str(request.args.get('name', sha256))

        if copy_sid:
            submission = STORAGE.submission.get(copy_sid, as_obj=False)
        else:
            submission = None

        if submission:
            if not Classification.is_accessible(user['classification'],
                                                submission['classification']):
                return make_api_response(
                    "",
                    "You are not allowed to re-submit a submission that you don't have access to",
                    403)

            submission_params = submission['params']
            submission_params['classification'] = submission['classification']
            expiry = submission['expiry_ts']

        else:
            submission_params = ui_to_submission_params(
                load_user_settings(user))
            submission_params['classification'] = file_info['classification']
            expiry = file_info['expiry_ts']

        # Calculate original submit time
        if submission_params['ttl'] and expiry:
            submit_time = epoch_to_iso(
                iso_to_epoch(expiry) - submission_params['ttl'] * 24 * 60 * 60)
        else:
            submit_time = None

        if not FILESTORE.exists(sha256):
            return make_api_response(
                {},
                "File %s cannot be found on the server therefore it cannot be resubmitted."
                % sha256,
                status_code=404)

        files = [{'name': name, 'sha256': sha256, 'size': file_info['size']}]

        submission_params['submitter'] = user['uname']
        submission_params['quota_item'] = True
        if 'priority' not in submission_params:
            submission_params['priority'] = 500
        submission_params[
            'description'] = "Resubmit %s for Dynamic Analysis" % name
        if "Dynamic Analysis" not in submission_params['services']['selected']:
            submission_params['services']['selected'].append(
                "Dynamic Analysis")

        try:
            submission_obj = Submission({
                "files": files,
                "params": submission_params,
                "time": submit_time
            })
        except (ValueError, KeyError) as e:
            return make_api_response("", err=str(e), status_code=400)

        submit_result = SubmissionClient(
            datastore=STORAGE,
            filestore=FILESTORE,
            config=config,
            identify=IDENTIFY).submit(submission_obj)
        submission_received(submission_obj)
        return make_api_response(submit_result.as_primitives())

    except SubmissionException as e:
        return make_api_response("", err=str(e), status_code=400)
    finally:
        if submit_result is None:
            decrement_submission_quota(user)
예제 #17
0
def resubmit_submission_for_analysis(sid, *args, **kwargs):
    """
    Resubmit a submission for analysis with the exact same parameters as before

    Variables:
    sid         => Submission ID to re-submit

    Arguments:
    None

    Data Block:
    None

    Result example:
    # Submission message object as a json dictionary
    """
    user = kwargs['user']
    quota_error = check_submission_quota(user)
    if quota_error:
        return make_api_response("", quota_error, 503)

    submit_result = None
    try:
        submission = STORAGE.submission.get(sid, as_obj=False)

        if submission:
            if not Classification.is_accessible(user['classification'],
                                                submission['classification']):
                return make_api_response(
                    "",
                    "You are not allowed to re-submit a submission that you don't have access to",
                    403)

            submission_params = submission['params']
            submission_params['classification'] = submission['classification']
        else:
            return make_api_response({},
                                     "Submission %s does not exists." % sid,
                                     status_code=404)

        submission_params['submitter'] = user['uname']
        submission_params['quota_item'] = True
        submission_params[
            'description'] = "Resubmit %s for analysis" % ", ".join(
                [x['name'] for x in submission["files"]])

        # Calculate original submit time
        if submission_params['ttl'] and submission['expiry_ts']:
            submit_time = epoch_to_iso(
                iso_to_epoch(submission['expiry_ts']) -
                submission_params['ttl'] * 24 * 60 * 60)
        else:
            submit_time = None

        try:
            submission_obj = Submission({
                "files": submission["files"],
                "metadata": submission['metadata'],
                "params": submission_params,
                "time": submit_time
            })
        except (ValueError, KeyError) as e:
            return make_api_response("", err=str(e), status_code=400)

        submit_result = SubmissionClient(
            datastore=STORAGE,
            filestore=FILESTORE,
            config=config,
            identify=IDENTIFY).submit(submission_obj)
        submission_received(submission_obj)

        return make_api_response(submit_result.as_primitives())
    except SubmissionException as e:
        return make_api_response("", err=str(e), status_code=400)
    finally:
        if submit_result is None:
            decrement_submission_quota(user)
예제 #18
0
            if now - epoch >= time_diff:
                user = key.split(" ")[0]
                client.hdel('c-tracker-quota', key)
                client.decr('quota-{user}'.format(user=user))
                logger.warning(
                    "API request: \"{key}\" was removed from ongoing "
                    "request because it reached the timeout.".format(key=key))
            else:
                logger.debug(
                    "{key} is ok. [{now} - {value} < {time_diff}]".format(
                        key=key, now=now, value=epoch, time_diff=time_diff))
    # Submission Quota tracking
    for key in persist.keys('submissions-*'):
        data = persist.hgetall(key)
        for sid, t in data.iteritems():
            epoch = isotime.iso_to_epoch(json.loads(t))
            now = time.time()
            if now - epoch >= quota_time_diff:
                user = key.split('-')[1]
                logger.warning(
                    'Quota item "{sid}" for user "{user}" was removed'.format(
                        sid=sid, user=user))
                persist.hdel(key, sid)

    # Web sessions tracking
    sessions = client.hgetall('flask_sessions')
    if sessions:
        for k, v in sessions.iteritems():
            v = json.loads(v)
            now = time.time()
            expire_at = v.get('expire_at', 0)
예제 #19
0
def url_update(test_func=test_file) -> None:
    """
    Using an update configuration file as an input, which contains a list of sources, download all the file(s) which
    have been modified since the last update.
    """
    update_config = {}
    # Load configuration
    if UPDATE_CONFIGURATION_PATH and os.path.exists(UPDATE_CONFIGURATION_PATH):
        with open(UPDATE_CONFIGURATION_PATH, 'r') as yml_fh:
            update_config = yaml.safe_load(yml_fh)
    else:
        LOGGER.warning("Could not find update configuration file.")
        exit(1)

    # Cleanup output path
    if os.path.exists(UPDATE_OUTPUT_PATH):
        if os.path.isdir(UPDATE_OUTPUT_PATH):
            shutil.rmtree(UPDATE_OUTPUT_PATH)
        else:
            os.unlink(UPDATE_OUTPUT_PATH)
    os.makedirs(UPDATE_OUTPUT_PATH)

    # Get sources
    sources = update_config.get('sources', None)
    # Exit if no update sources given
    if not sources:
        exit()

    # Parse updater configuration
    previous_update = update_config.get('previous_update', None)
    previous_hash = update_config.get('previous_hash', None) or {}
    if previous_hash:
        previous_hash = json.loads(previous_hash)
    if isinstance(previous_update, str):
        previous_update = iso_to_epoch(previous_update)

    # Create a requests session
    session = requests.Session()

    files_sha256 = {}

    # Go through each source and download file
    for source in sources:
        uri = source['uri']
        name = source['name']

        if not uri or not name:
            LOGGER.warning(f"Invalid source: {source}")
            continue

        LOGGER.info(f"Downloading file '{name}' from uri '{uri}' ...")

        username = source.get('username', None)
        password = source.get('password', None)
        auth = (username, password) if username and password else None
        ca_cert = source.get('ca_cert', None)
        ignore_ssl_errors = source.get('ssl_ignore_errors', False)

        headers = source.get('headers', None)

        if ca_cert:
            # Add certificate to requests
            cafile = certifi.where()
            with open(cafile, 'a') as ca_editor:
                ca_editor.write(f"\n{ca_cert}")

        session.verify = not ignore_ssl_errors

        try:
            # Check the response header for the last modified date
            response = session.head(uri, auth=auth, headers=headers)
            last_modified = response.headers.get('Last-Modified', None)
            if last_modified:
                # Convert the last modified time to epoch
                last_modified = time.mktime(
                    time.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z"))

                # Compare the last modified time with the last updated time
                if update_config.get(
                        'previous_update',
                        None) and last_modified <= previous_update:
                    # File has not been modified since last update, do nothing
                    LOGGER.info(
                        "File has not changed since last time, Skipping...")
                    continue

            if update_config.get('previous_update', None):
                previous_update = time.strftime("%a, %d %b %Y %H:%M:%S %Z",
                                                time.gmtime(previous_update))
                if headers:
                    headers['If-Modified-Since'] = previous_update
                else:
                    headers = {
                        'If-Modified-Since': previous_update,
                    }

            response = session.get(uri, auth=auth, headers=headers)

            # Check the response code
            if response.status_code == requests.codes['not_modified']:
                # File has not been modified since last update, do nothing
                LOGGER.info(
                    "File has not changed since last time, Skipping...")
                continue
            elif response.ok:
                file_path = os.path.join(UPDATE_OUTPUT_PATH, name)
                with open(file_path, 'wb') as f:
                    f.write(response.content)

                if not test_func(file_path):
                    os.unlink(file_path)
                    LOGGER.warning(
                        f"The downloaded file was invalid. It will not be part of this update..."
                    )
                    continue

                # Append the SHA256 of the file to a list of downloaded files
                sha256 = get_sha256_for_file(file_path)
                if previous_hash.get(name, None) != sha256:
                    files_sha256[name] = sha256
                else:
                    LOGGER.info(
                        "File as the same hash as last time. Skipping...")

                LOGGER.info("File successfully downloaded!")
        except requests.Timeout:
            LOGGER.warning(
                f"Cannot find the file for source {name} with url {uri} - (Timeout)"
            )
            continue
        except Exception as e:
            # Catch all other types of exceptions such as ConnectionError, ProxyError, etc.
            LOGGER.warning(f"Source {name} failed with error: {str(e)}")

    if files_sha256:
        new_hash = deepcopy(previous_hash)
        new_hash.update(files_sha256)

        # Check if the new update hash matches the previous update hash
        if new_hash == previous_hash:
            # Update file(s) not changed, delete the downloaded files and exit
            shutil.rmtree(UPDATE_OUTPUT_PATH, ignore_errors=True)
            exit()

        # Create the response yaml
        with open(os.path.join(UPDATE_OUTPUT_PATH, 'response.yaml'),
                  'w') as yml_fh:
            yaml.safe_dump(dict(hash=json.dumps(new_hash), ), yml_fh)

        LOGGER.info("Service update file(s) successfully downloaded")

    # Close the requests session
    session.close()
def test_isotime_epoch():
    epoch_date = now(200)

    assert epoch_date == local_to_epoch(epoch_to_local(epoch_date))
    assert epoch_date == iso_to_epoch(epoch_to_iso(epoch_date))
    assert isinstance(epoch_date, float)
예제 #21
0
def url_download(source: Dict[str, Any],
                 previous_update: int = None,
                 logger=None,
                 output_dir: str = None) -> List[Tuple[str, str]]:
    """

    :param source:
    :param previous_update:
    :return:
    """
    name = source['name']
    uri = source['uri']
    pattern = source.get('pattern', None)
    username = source.get('username', None)
    password = source.get('password', None)
    ca_cert = source.get('ca_cert', None)
    ignore_ssl_errors = source.get('ssl_ignore_errors', False)
    auth = (username, password) if username and password else None

    proxy = source.get('proxy', None)
    headers_list = source.get('headers', [])
    headers = {}
    [
        headers.update({header['name']: header['value']})
        for header in headers_list
    ]

    logger.info(
        f"{name} source is configured to {'ignore SSL errors' if ignore_ssl_errors else 'verify SSL'}."
    )
    if ca_cert:
        logger.info("A CA certificate has been provided with this source.")
        add_cacert(ca_cert)

    # Create a requests session
    session = requests.Session()
    session.verify = not ignore_ssl_errors

    # Let https requests go through proxy
    if proxy:
        os.environ['https_proxy'] = proxy

    try:
        if isinstance(previous_update, str):
            previous_update = iso_to_epoch(previous_update)

        # Check the response header for the last modified date
        response = session.head(uri, auth=auth, headers=headers)
        last_modified = response.headers.get('Last-Modified', None)
        if last_modified:
            # Convert the last modified time to epoch
            last_modified = time.mktime(
                time.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z"))

            # Compare the last modified time with the last updated time
            if previous_update and last_modified <= previous_update and not FORCE_UPDATE:
                # File has not been modified since last update, do nothing
                raise SkipSource()

        if previous_update:
            previous_update = time.strftime("%a, %d %b %Y %H:%M:%S %Z",
                                            time.gmtime(previous_update))
            if headers:
                headers['If-Modified-Since'] = previous_update
            else:
                headers = {'If-Modified-Since': previous_update}

        response = session.get(uri, auth=auth, headers=headers)

        # Check the response code
        if response.status_code == requests.codes[
                'not_modified'] and not FORCE_UPDATE:
            # File has not been modified since last update, do nothing
            raise SkipSource()
        elif response.ok:
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)

            file_name = os.path.basename(urlparse(uri).path)
            file_path = os.path.join(output_dir, file_name)
            with open(file_path, 'wb') as f:
                for content in response.iter_content(BLOCK_SIZE):
                    f.write(content)

            # Clear proxy setting
            if proxy:
                del os.environ['https_proxy']

            if file_name.endswith('tar.gz') or file_name.endswith('zip'):
                extract_dir = os.path.join(output_dir, name)
                shutil.unpack_archive(file_path, extract_dir=extract_dir)

                return filter_downloads(extract_dir, pattern)
            else:
                return [(file_path, get_sha256_for_file(file_path))]
        else:
            logger.warning(f"Download not successful: {response.content}")
            return []

    except SkipSource:
        # Raise to calling function for handling
        raise
    except Exception as e:
        # Catch all other types of exceptions such as ConnectionError, ProxyError, etc.
        logger.warning(str(e))
        exit()
    finally:
        # Close the requests session
        session.close()