def matchPackageNames(self, pkgspecs):
        matched = []
        exactmatch = []
        unmatched = None
        for sack in self.sacks.values():
            if hasattr(sack, "matchPackageNames"):
                e, m, u = [], [], []
                try:
                    e, m, u = sack.matchPackageNames(pkgspecs)
                except PackageSackError:
                    continue

                exactmatch.extend(e)
                matched.extend(m)
                if unmatched is None:
                    unmatched = set(u)
                else:
                    unmatched = unmatched.intersection(set(u))

        matched = misc.unique(matched)
        exactmatch = misc.unique(exactmatch)
        if unmatched is None:
            unmatched = []
        else:
            unmatched = list(unmatched)
        return exactmatch, matched, unmatched
Example #2
0
    def matchPackageNames(self, pkgspecs):
        matched = []
        exactmatch = []
        unmatched = None
        for sack in self.sacks.values():
            if hasattr(sack, "matchPackageNames"):
                e, m, u = [], [], []
                try:
                    e, m, u = sack.matchPackageNames(pkgspecs)
                except PackageSackError:
                    continue

                exactmatch.extend(e)
                matched.extend(m)
                if unmatched is None:
                    unmatched = set(u)
                else:
                    unmatched = unmatched.intersection(set(u))

        matched = misc.unique(matched)
        exactmatch = misc.unique(exactmatch)
        if unmatched is None:
            unmatched = []
        else:
            unmatched = list(unmatched)
        return exactmatch, matched, unmatched
Example #3
0
def parsePackages(pkgs, usercommands, casematch=0,
                  unique='repo-epoch-name-version-release-arch'):
    """matches up the user request versus a pkg list:
       for installs/updates available pkgs should be the 'others list'
       for removes it should be the installed list of pkgs
       takes an optional casematch option to determine if case should be matched
       exactly. Defaults to not matching."""

    pkgdict = buildPkgRefDict(pkgs, bool(casematch))
    exactmatch = []
    matched = []
    unmatched = []
    for command in usercommands:
        if not casematch:
            command = command.lower()
        if command in pkgdict:
            exactmatch.extend(pkgdict[command])
            del pkgdict[command]
        else:
            # anything we couldn't find a match for
            # could mean it's not there, could mean it's a wildcard
            if misc.re_glob(command):
                trylist = pkgdict.keys()
                # command and pkgdict are already lowered if not casematch
                # so case sensitive is always fine
                restring = fnmatch.translate(command)
                regex = re.compile(restring)
                foundit = 0
                for item in trylist:
                    if regex.match(item):
                        matched.extend(pkgdict[item])
                        del pkgdict[item]
                        foundit = 1

                if not foundit:
                    unmatched.append(command)

            else:
                unmatched.append(command)

    unmatched = misc.unique(unmatched)
    if unique == 'repo-epoch-name-version-release-arch': # pkg.__hash__
        matched    = misc.unique(matched)
        exactmatch = misc.unique(exactmatch)
    elif unique == 'repo-pkgkey': # So we get all pkg entries from a repo
        def pkgunique(pkgs):
            u = {}
            for pkg in pkgs:
                mark = "%s%s" % (pkg.repo.id, pkg.pkgKey)
                u[mark] = pkg
            return u.values()
        matched    = pkgunique(matched)
        exactmatch = pkgunique(exactmatch)
    else:
        raise ValueError, "Bad value for unique: %s" % unique
    return exactmatch, matched, unmatched
Example #4
0
def _do_github_search_query(search_query):
    logger = logging.getLogger("github_search")

    def remove_hrefs(s):
        s = re.sub(r"<a href.+?>", "", s)
        s = s.replace("</a>", "")
        return s

    mutations = [
        lambda x: [x], lambda x: [remove_hrefs(x)],
        lambda x: remove_hrefs(x).split("\n")
    ]
    mutants = misc.unique(
        misc.flatten_list([m(search_query.query) for m in mutations]))
    for query_str in mutants:
        if not query_str:
            continue
        logger.info("trying {0}".format(query_str))
        code, answer = net.github_search(query_str)
        if code == net.CODE_TIMEOUT:
            logger.info("sleeping...")
            mutants.append(query_str)  # Try again.
            time.sleep(60)
        elif code == net.CODE_VALIDATION:
            logger.info("got 422: " + answer)
            search_query.state = models.GithubSearchQuery.ERROR
            db.global_session.commit()
        elif code == net.CODE_OK:
            if len(answer["items"]) > 5:
                answer["items"] = [
                    item for item in answer["items"] if _messages_match(
                        search_query.query, item["commit"]["message"])
                ]
            hash_strings = misc.unique(
                [item["sha"] for item in answer["items"]])
            logger.info("got results: {0}".format(hash_strings))
            queries = []
            if hash_strings:
                search_query.state = models.GithubSearchQuery.NON_EMPTY
                for h in hash_strings:
                    queries += [
                        db.InsertQuery(models.CommitHash, hash=h),
                        db.ConnectQuery(models.query_hash_table,
                                        search_query.query, h)
                    ]
                    queries += [
                        db.ConnectQuery(models.hash_url_table, h, url.url)
                        for url in search_query.urls
                    ]
                db.process_queries(queries)
                db.global_session.commit()  # Commit state update.
                return
            search_query.state = models.GithubSearchQuery.EMPTY
            db.global_session.commit()  # Commit state update.
        else:
            raise "got something unexpected: {0} {1}".format(code, answer)
Example #5
0
    def matchPackageNames(self, pkgspecs):
        """take a list strings and match the packages in the sack against it
           this will match against:
           name
           name.arch
           name-ver-rel.arch
           name-ver
           name-ver-rel
           epoch:name-ver-rel.arch
           name-epoch:ver-rel.arch
           
           return [exact matches], [glob matches], [unmatch search terms]
           """
        # Setup match() for the search we're doing
        matched = []
        exactmatch = []
        unmatched = set(pkgspecs)

        specs = {}
        for p in pkgspecs:
            if misc.re_glob(p):
                restring = fnmatch.translate(p)
                specs[p] = re.compile(restring)
            else:
                specs[p] = p

        #  We don't use simplePkgList() here because that loads all of the
        # rpmdb, if we are Eg. doing a "remove PackageKit".
        pkgs = self.returnPackages(patterns=unmatched)
        for pkgtup in [pkg.pkgtup for pkg in pkgs]:
            (n,a,e,v,r) = pkgtup
            names = set((
                n, 
                '%s.%s' % (n, a),
                '%s-%s-%s.%s' % (n, v, r, a),
                '%s-%s' % (n, v),
                '%s-%s-%s' % (n, v, r),
                '%s:%s-%s-%s.%s' % (e, n, v, r, a),
                '%s-%s:%s-%s.%s' % (n, e, v, r, a),
                ))
                
            for (term,query) in specs.items():
                if term == query:
                    if query in names:
                        exactmatch.append(self.searchPkgTuple(pkgtup)[0])
                        unmatched.discard(term)
                else:
                    for n in names:
                        if query.match(n):
                            matched.append(self.searchPkgTuple(pkgtup)[0])
                            unmatched.discard(term)
        return misc.unique(exactmatch), misc.unique(matched), list(unmatched)
    def matchPackageNames(self, pkgspecs):
        """take a list strings and match the packages in the sack against it
           this will match against:
           name
           name.arch
           name-ver-rel.arch
           name-ver
           name-ver-rel
           epoch:name-ver-rel.arch
           name-epoch:ver-rel.arch
           
           return [exact matches], [glob matches], [unmatch search terms]
           """
        # Setup match() for the search we're doing
        matched = []
        exactmatch = []
        unmatched = set(pkgspecs)

        specs = {}
        for p in pkgspecs:
            if misc.re_glob(p):
                restring = fnmatch.translate(p)
                specs[p] = re.compile(restring)
            else:
                specs[p] = p

        #  We don't use simplePkgList() here because that loads all of the
        # rpmdb, if we are Eg. doing a "remove PackageKit".
        pkgs = self.returnPackages(patterns=unmatched)
        for pkgtup in [pkg.pkgtup for pkg in pkgs]:
            (n,a,e,v,r) = pkgtup
            names = set((
                n, 
                '%s.%s' % (n, a),
                '%s-%s-%s.%s' % (n, v, r, a),
                '%s-%s' % (n, v),
                '%s-%s-%s' % (n, v, r),
                '%s:%s-%s-%s.%s' % (e, n, v, r, a),
                '%s-%s:%s-%s.%s' % (n, e, v, r, a),
                ))
                
            for (term,query) in specs.items():
                if term == query:
                    if query in names:
                        exactmatch.append(self.searchPkgTuple(pkgtup)[0])
                        unmatched.discard(term)
                else:
                    for n in names:
                        if query.match(n):
                            matched.append(self.searchPkgTuple(pkgtup)[0])
                            unmatched.discard(term)
        return misc.unique(exactmatch), misc.unique(matched), list(unmatched)
Example #7
0
def _get_queries_for_hash_url(url_string, hashes):
    hashes = misc.unique(hashes)
    if config.IGNORE_SHORT_HASHES:
        hashes = [h for h in hashes if len(h) == 40]
    queries = \
        [db.InsertQuery(models.CommitHash, hash=h) for h in hashes] +\
        [db.ConnectQuery(models.hash_url_table, h, url_string) for h in hashes]
    return queries
Example #8
0
 def search_tags(self, tagname):
     res = {}
     for ptd in self.db_objs.values():
         for (name, taglist) in ptd.search_tags(tagname).items():
             if not name in res:
                 res[name] = []
             res[name].extend(taglist)
     out = {}
     for (name, taglist) in res.items():
         out[name] = misc.unique(taglist)
     return out
Example #9
0
def process_queries(all_queries):
    query_classes = [
            # Order of classes here is important.
            InsertQuery,
            ConnectQuery,
            # UpdateQuery,
            UpdateTagQuery,
    ]
    for C in query_classes:
        queries = misc.unique([q for q in all_queries if q.__class__ == C])
        C.process_func(queries)
Example #10
0
 def search_tags(self, tagname):
     res = {}
     for ptd in self.db_objs.values():
         for (name, taglist) in ptd.search_tags(tagname).items():
             if not name in res:
                 res[name] = []
             res[name].extend(taglist)
     out = {}
     for (name, taglist) in res.items():
         out[name] = misc.unique(taglist)
     return out
Example #11
0
def get_subdirs_recursively(dir):
    """
    get all subdirectories recursively in the given directory
    """
    all_files = []
    os.path.walk(dir, match_files_recursively_helper, all_files)

    matches = misc.unique([f for f in all_files if os.path.isdir(f)])

    matches.sort(lambda l, o: cmp(l.upper(), o.upper()))

    return matches
Example #12
0
def get_subdirs_recursively(dir, follow_symlinks=False):
    """
    get all subdirectories recursively in the given directory
    """
    all_files = []
    os.path.walk(dir, match_files_recursively_helper, (all_files, [], follow_symlinks))

    matches = misc.unique([f for f in all_files if os.path.isdir(f) ])

    matches.sort(lambda l, o: cmp(l.upper(), o.upper()))

    return matches
Example #13
0
def match_files_recursively(dir, suffix_list):
    """
    get all files matching suffix_list in the dir and in it's subdirectories
    """
    all_files = []
    os.path.walk(dir, match_files_recursively_helper, all_files)

    matches = misc.unique(
        [f for f in all_files if match_suffix(f, suffix_list)])

    matches.sort(lambda l, o: cmp(l.upper(), o.upper()))

    return matches
Example #14
0
    def file_func(file_obj):
        name = basename(file_obj["path"])
        if local:
            raw = fs.read_file(file_obj["path"])
        else:
            raw = net.get_raw_resource(file_obj["download_url"],
                                       auth=net.github_auth)
        url_strings = misc.unique(re.findall(commit_re, raw))
        queries = []
        if url_strings:
            # Figure out the tag list.
            tags = ["vulndb"]
            for k, v in tag_dict.iteritems():
                if k in file_obj["path"]:
                    tags.append(v)
                    break

            # Insert CVEs/NonCVEs and connect them to urls.
            cve_strings = misc.unique(re.findall(cve_re, raw))
            if not cve_strings:
                hash_id = models.NonCVE.hash_id_for_urls(url_strings)
                queries = [db.InsertQuery(models.NonCVE, hash_id=hash_id)]
                queries += _get_queries_for_noncve_url(hash_id, url_strings,
                                                       tags)
            else:
                # Surpisingly there's some CVEs in this db, which are marked
                # as reserved in the nist feed. We need to add them here.
                queries = [
                    db.InsertQuery(models.CVE, cve_string=cve_string)
                    for cve_string in cve_strings
                ]
                for cve_string in cve_strings:
                    queries += _get_queries_for_cve_url(
                        cve_string, url_strings, tags)
        bar.update()
        return ([], queries)
Example #15
0
def match_files_recursively(dir, suffix_list, skip_password=False, follow_symlinks=False):
    """
    get all files matching suffix_list in the dir and in it's subdirectories
    """
    all_files = []
    if skip_password:
        os.path.walk(dir, match_files_recursively_skip_protected, (all_files, [], follow_symlinks))
    else:
        os.path.walk(dir, match_files_recursively_helper, (all_files, [], follow_symlinks))

    matches = misc.unique([f for f in all_files if match_suffix(f, suffix_list) ])

    matches.sort(lambda l, o: cmp(l.upper(), o.upper()))

    return matches
Example #16
0
def check_hashes_for_urls_like(template):
    urls = db.global_session.query(models.URL).all()
    urls = [url for url in urls if template in url.url]
    all_hashes = misc.unique(
        [hash.hash for url in urls for hash in url.hashes])

    good = []
    bad = []
    for h in all_hashes:
        matches = filter(lambda x: x.startswith(h), get_found_hashes())
        if matches:
            # We want the longer hash.
            good.append(matches[0])
        else:
            bad.append(h)

    print "\n".join(good)
Example #17
0
def crawl_github_repo(user, repo, file_callback, dir_callback, local):
    # TODO: add a non-parallel version.
    # TODO: adapt to the new db format. Maybe make a second queue for collecting
    # return data from workers.
    task_queue = Queue()
    results_queue = Queue()
    if local:
        path = misc.repo_path(user, repo)
        crawl_list = fs.crawl_dir(path)
    else:
        base_url = net.get_api_url(user, repo)
        crawl_list = net.get_json_resource(base_url, auth=net.github_auth)

    def worker(task_queue, results_queue):
        while True:
            item = task_queue.get()
            crawl_list = []
            if item["type"] == "file":
                crawl_list, queries = file_callback(item)
                results_queue.put(queries)
            elif dir_callback(item):
                if local:
                    crawl_list = fs.crawl_dir(item["path"])
                else:
                    crawl_list = net.get_json_resource(item["url"],
                                                       auth=net.github_auth)
            for item in crawl_list:
                task_queue.put(item)
            task_queue.task_done()

    for item in crawl_list:
        task_queue.put(item)
    for i in xrange(0, config.THREADS_COUNT):
        t = Thread(target=worker, args=(task_queue, results_queue))
        # TODO: can't make the threads stop properly when an event is fired.
        # So for now we'll do it with daemon threads, which will get blocked
        # after the queue gets empty.
        t.daemon = True
        t.start()
    task_queue.join()
    return misc.unique(misc.flatten_list(results_queue.queue))
Example #18
0
def match_files_recursively(dir,
                            suffix_list,
                            skip_password=False,
                            follow_symlinks=False):
    """
    get all files matching suffix_list in the dir and in it's subdirectories
    """
    all_files = []
    if skip_password:
        os.path.walk(dir, match_files_recursively_skip_protected,
                     (all_files, [], follow_symlinks))
    else:
        os.path.walk(dir, match_files_recursively_helper,
                     (all_files, [], follow_symlinks))

    matches = misc.unique(
        [f for f in all_files if match_suffix(f, suffix_list)])

    matches.sort(lambda l, o: cmp(l.upper(), o.upper()))

    return matches
    def searchPrco(self, name, prcotype):
        self._checkIndexes(failure='build')
        prcodict = getattr(self, prcotype)
        (n,f,(e,v,r)) = misc.string_to_prco_tuple(name)
        
        basic_results = []
        results = []
        if n in prcodict:
            basic_results.extend(prcodict[n])

        for po in basic_results:
            if po.checkPrco(prcotype, (n, f, (e,v,r))):
                results.append(po)

        if prcotype != "provides":
            return results
            
        if not misc.re_filename(n):
            return results

        results.extend(self.searchFiles(n))
        return misc.unique(results)
    def searchPrco(self, name, prcotype):
        self._checkIndexes(failure='build')
        prcodict = getattr(self, prcotype)
        (n, f, (e, v, r)) = misc.string_to_prco_tuple(name)

        basic_results = []
        results = []
        if n in prcodict:
            basic_results.extend(prcodict[n])

        for po in basic_results:
            if po.checkPrco(prcotype, (n, f, (e, v, r))):
                results.append(po)

        if prcotype != "provides":
            return results

        if not misc.re_filename(n):
            return results

        results.extend(self.searchFiles(n))
        return misc.unique(results)
Example #21
0
def search_not_found_hashes():
    not_found = get_not_found_hashes()
    try:
        cached = get_cached_commits()
    except:
        cached = {}

    not_found = [x for x in not_found if x and cached.get(x) is None]
    if not not_found:
        print "No hashes to search!"
        return
    bar = misc.KnownLengthBar(maxval=len(not_found), parallel=False)
    start_time = bar.start_time
    for i, h in enumerate(not_found):
        bar.update(1)
        try:
            code, reply = net.github_search(h)
        except Exception as e:
            print "Got exception: {0} for hash: {1}".format(e, h)
            not_found.append(h)
            continue
        if code == net.CODE_OK:
            cached[h] = misc.unique(
                [x["repository"]["full_name"] for x in reply["items"]])
        elif code == net.CODE_TIMEOUT:
            not_found.append(h)
            with open(config.HASH_CACHE_FILE, "w") as f:
                json.dump(cached, f, indent=2)
            # bar.finish()
            time.sleep(60)
            bar = misc.KnownLengthBar(maxval=len(not_found), parallel=False)
            bar.start_time = start_time
            bar.update(i)
        else:
            print "Got code {0} for hash: {1}".format(code, h)

    with open(config.HASH_CACHE_FILE, "w") as f:
        json.dump(cached, f, indent=2)
Example #22
0
def crawl_android_security_bulletin(parallel=True):
    raw = net.get_raw_resource("https://source.android.com/security/bulletin/")
    urls = re.findall(
        r"https?://source.android.com/security/bulletin/[0-9-]{10}", raw)
    urls = misc.unique(urls)
    bar = misc.KnownLengthBar(maxval=len(urls), parallel=parallel)

    def worker(url_string):
        raw = net.get_raw_resource(url_string)
        results = re.findall(
            r"<td>(CVE[0-9-]+)</td>\s+<td><a href=\"(\S+?)\">", raw, re.DOTALL)
        results = [r for r in results if _should_keep_url(r[1])]
        queries = [
            db.InsertQuery(models.CVE, cve_string=r[0]) for r in results
        ]
        queries += [db.InsertQuery(models.URL, url=r[1]) for r in results]
        queries += [
            db.ConnectQuery(models.cve_url_table, r[0], r[1]) for r in results
        ]
        bar.update()
        return queries

    return _process_queries_from_workers(worker, urls, parallel, bar)
Example #23
0
 def __add__(self,other):
     """ sums flights: Assigns all positions of the flight to a single flight only if they are consecutive"""
     points=np.vstack((self.UTM,other.UTM))
     return Flight(misc.unique(points))
Example #24
0
def _process_queries_from_workers(worker_func, sequence, parallel, bar=None):
    results = misc.map_func(worker_func, sequence, parallel)
    if bar:
        bar.finish()
    queries = misc.unique(misc.flatten_list(results))
    return db.process_queries(queries)
Example #25
0
def dump_commits(parallel=True):
    black_list_res = [
        # This list is used to temporarily disable some of the urls to not waste
        # time on processing them.

        # re.compile(r".+git.kernel.org.+"),
        # re.compile(r".+github.com.+"),
        # re.compile(r".+svn.apache.org.+"),
        # re.compile(".+github.+(linux).+"),
    ]

    def black_list(url_string):
        for black_list_re in black_list_res:
            if re.match(black_list_re, url_string):
                return True
        return False

    def extract(url_string):
        extractors = [
            extract_from_github_commit,
            extract_from_github_issue,
            extract_from_github_pull,
            extract_from_apache_svn,
            extract_from_commit_urls,
            extract_from_googlesource,
            extract_from_moodle,
            extract_from_chromium_codereview,
            # TODO: extract from git kernel org
        ]
        queries = []
        for e in extractors:
            queries = e(url_string)
            if queries:
                break
        bar.update()
        return queries

    print "Parsing URLs"
    url_strings = [
        x.url for x in db.global_session.query(models.URL).all()
        if not black_list(x.url) and not x.hashes and not x.queries
    ]
    bar = misc.KnownLengthBar(maxval=len(url_strings), parallel=parallel)
    queries = misc.map_func(extract, url_strings, parallel)
    queries = misc.unique(misc.flatten_list(queries))
    print "Parsing URLs done"

    if not queries:
        print "No new hashes :("
    else:
        print "Storing results"
        db.process_queries(queries)
        print "Storing results done"

    print "Writing bad urls to {0}".format(config.BAD_URLS_FILE)
    good_urls_set = set([
        q.right_unique_value for q in queries if q.__class__ == db.ConnectQuery
        and q.table in [models.hash_url_table, models.query_url_table]
    ])
    bad_urls = [x for x in url_strings if x not in good_urls_set]

    with open(config.BAD_URLS_FILE, "w") as f:
        f.write("\n".join(sorted(bad_urls)))
Example #26
0
#!/usr/bin/python

assert __name__ == '__main__'

import argparse
from datetime import datetime as DT
from misc import KEYBOOKS, cdar, unique, car, FORMAT0, FORMAT1, KEEP

parser = argparse.ArgumentParser(
    description='catenate a processed version of an exported libre file.')
parser.add_argument('--test', action='store_true')
parser.add_argument('filename')
args = parser.parse_args()
if True:
    target = args.filename
    with open(target) as fd:
        _owner = fd.readline().strip()
        _keys = fd.readline().strip().split('\t')
        _agree = lambda book: _keys == map(car, book)
        keys = map(cdar, unique(filter(_agree, KEYBOOKS)))
        for line in fd.readlines():
            vals = line.strip().split('\t')
            items = zip(keys, vals)
            items.sort()
            stamp = DT.strptime(dict(items)['Time'], FORMAT0).strftime(FORMAT1)
            print(stamp + ':libre:' + repr(filter(KEEP, items)))