def load_all(dataset_dir, doc_ids=None, filter_types=None, filter_senses=None):
    """Load whole CoNLL16st dataset by document id."""

    # load all provided files untouched
    parses = load_parses(dataset_dir, doc_ids=doc_ids)
    doc_ids = sorted(parses.keys())
    raws = load_raws(dataset_dir, doc_ids=doc_ids)
    relations_gold = load_relations_gold(dataset_dir, doc_ids=doc_ids, with_senses=True, filter_types=filter_types, filter_senses=filter_senses)
    if relations_gold:
        relationsnos_gold = relations_gold
    else:
        relationsnos_gold = load_relations_gold(dataset_dir, doc_ids=doc_ids, with_senses=False, filter_types=filter_types, filter_senses=filter_senses)

    # extract data by document id and token id
    words = get_words(parses)
    pos_tags = get_pos_tags(parses)
    word_metas = get_word_metas(parses, raws)

    # extract data by document id and token id pairs
    dependencies = get_dependencies(parses)

    # extract data by document id
    parsetrees = get_parsetrees(parses)

    # extract data by relation id
    rel_parts = get_rel_parts(relationsnos_gold)
    rel_ids = sorted(rel_parts.keys())
    rel_types = get_rel_types(relations_gold)
    rel_senses = get_rel_senses(relations_gold)

    # add extra fields
    add_relation_tags(word_metas, rel_types, rel_senses)

    return doc_ids, words, word_metas, pos_tags, dependencies, parsetrees, rel_ids, rel_parts, rel_types, rel_senses, relations_gold
Beispiel #2
0
def populate():
    print "Getting the list of all packages."
    client = xmlrpclib.ServerProxy('http://pypi.python.org/pypi')
    packages = client.list_packages()
    print "Ridiculous!  Found %i packages." % len(packages)

    print "Ingesting %i packages." % len(packages)
    with cm(shelve.open(fname, writeback=True)) as d:
        d['packages'] = d.get('packages', {})

        for package in packages:
            if package not in d['packages']:
                result = ingest_package(package)

                try:
                    requires = dependencies.get_dependencies(result['name'])
                    print result['name'], "depends on %r" % requires
                except Exception as e:
                    print str(e)
                    continue

                d['packages'][result['name']] = result
                d['packages'][result['name']]['dependencies'] = requires

                d.sync()
            else:
                print "Skipping          ", package
    print "Complete!"
Beispiel #3
0
def load_all(dataset_dir, doc_ids=None, filter_types=None, filter_senses=None):
    """Load whole CoNLL16st dataset by document id."""

    # load all provided files untouched
    parses = load_parses(dataset_dir, doc_ids=doc_ids)
    doc_ids = sorted(parses.keys())
    raws = load_raws(dataset_dir, doc_ids=doc_ids)
    relations_gold = load_relations_gold(dataset_dir,
                                         doc_ids=doc_ids,
                                         with_senses=True,
                                         filter_types=filter_types,
                                         filter_senses=filter_senses)
    if relations_gold:
        relationsnos_gold = relations_gold
    else:
        relationsnos_gold = load_relations_gold(dataset_dir,
                                                doc_ids=doc_ids,
                                                with_senses=False,
                                                filter_types=filter_types,
                                                filter_senses=filter_senses)

    # extract data by document id and token id
    words = get_words(parses)
    pos_tags = get_pos_tags(parses)
    word_metas = get_word_metas(parses, raws)

    # extract data by document id and token id pairs
    dependencies = get_dependencies(parses)

    # extract data by document id
    parsetrees = get_parsetrees(parses)

    # extract data by relation id
    rel_parts = get_rel_parts(relationsnos_gold)
    rel_ids = sorted(rel_parts.keys())
    rel_types = get_rel_types(relations_gold)
    rel_senses = get_rel_senses(relations_gold)

    # add extra fields
    add_relation_tags(word_metas, rel_types, rel_senses)

    return doc_ids, words, word_metas, pos_tags, dependencies, parsetrees, rel_ids, rel_parts, rel_types, rel_senses, relations_gold
Beispiel #4
0
import dependencies

map=dependencies.get_dependencies("packages")

print "graph deps {"
for pkg in map.keys():
    for dep in map[pkg]:
        print "\t%s -> %s;" % (pkg,dep)
print "}"
Beispiel #5
0
def install_deps():
    # Pretty much just `yum -y install <dependencies>`
    yum = ['/usr/bin/yum', '-e1', '-y', 'install']
    __call(yum, dependencies.get_dependencies(),
            fail_message="Unable to install required packages.")