Python recluster Examples

Programming Language: Python

Namespace/Package Name: recluster

Method/Function: recluster

Examples at hotexamples.com: 4

Python recluster - 4 examples found. These are the top rated real world Python examples of recluster.recluster extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: recluster_groups.py Project: HeinrichHartmann/OpenCitationsCorpus

def run(input_filename, output_filename):
    reader = itertools.imap(lambda a:Article(*[f.decode('utf-8') for f in a]), csv.reader(open(input_filename, 'r')))
    writer = csv.writer(open(output_filename, 'w'))

    group_counter, split_counter = 0, 0

    for i, (group_id, articles) in enumerate(itertools.groupby(reader, lambda a:a.group)):
        group_id, articles = int(group_id), list(articles)
        if i % 1000 == 0 and i:
            print "%8i %8i %8i %8i %8.5f%%" % (i, group_id, split_counter, group_counter, split_counter/group_counter*100)

        groups = list(recluster(articles))

        groups.sort(key=lambda g:-len(g))
        if len(groups) > 1 or sum(len(g) for g in groups[1:]) > 8:
            split_counter += 1
            print len(groups), sorted(map(len, groups))
#            for identifier in IDENTIFIERS:
#                print "  ", identifier, [count(getattr(a, identifier) for a in g) for g in groups]
            split(groups)


        for group in groups:
            gc = unicode(group_counter)
            for article in group:
                article = article._replace(group = gc)
                writer.writerow([f.encode('utf-8') for f in article])
            group_counter += 1

Example #2

Show file

File: recluster_groups.py Project: viveksck/OpenCitationsCorpus

def run(input_filename, output_filename):
    reader = itertools.imap(lambda a: Article(*[f.decode('utf-8') for f in a]),
                            csv.reader(open(input_filename, 'r')))
    writer = csv.writer(open(output_filename, 'w'))

    group_counter, split_counter = 0, 0

    for i, (group_id,
            articles) in enumerate(itertools.groupby(reader,
                                                     lambda a: a.group)):
        group_id, articles = int(group_id), list(articles)
        if i % 1000 == 0 and i:
            print "%8i %8i %8i %8i %8.5f%%" % (i, group_id, split_counter,
                                               group_counter, split_counter /
                                               group_counter * 100)

        groups = list(recluster(articles))

        groups.sort(key=lambda g: -len(g))
        if len(groups) > 1 or sum(len(g) for g in groups[1:]) > 8:
            split_counter += 1
            print len(groups), sorted(map(len, groups))
            #            for identifier in IDENTIFIERS:
            #                print "  ", identifier, [count(getattr(a, identifier) for a in g) for g in groups]
            split(groups)

        for group in groups:
            gc = unicode(group_counter)
            for article in group:
                article = article._replace(group=gc)
                writer.writerow([f.encode('utf-8') for f in article])
            group_counter += 1

Example #3

Show file

File: unify.py Project: HeinrichHartmann/OpenCitationsCorpus

        identifiers = [(k,v) for k,v in article._asdict().items() if k in IDENTIFIERS and v]
        data = None # dict(identifiers)
        if not identifiers:
            without_identifiers += 1
            continue
        articles[identifiers[0]].append(article)
        for identifier in identifiers[1:]:
            if articles[identifiers[0]] is not articles[identifier]:
                articles[identifiers[0]] += articles[identifier]
                articles[identifier] = articles[identifiers[0]]

        if i % 10000 == 0:
            print "%7d" % i
except:
    pass


i = 0
for group in articles.itervalues():
    groups = recluster(group)
    for group in groups:
        for article in group:
            article = article._asdict()
            article['group'] = i
            article = Article(**article)
            writer.writerow(article)
        i += 1

    if i % 10000 == 0:
        print "%7d" % i

Example #4

Show file

File: unify.py Project: viveksck/OpenCitationsCorpus

        identifiers = [(k, v) for k, v in article._asdict().items()
                       if k in IDENTIFIERS and v]
        data = None  # dict(identifiers)
        if not identifiers:
            without_identifiers += 1
            continue
        articles[identifiers[0]].append(article)
        for identifier in identifiers[1:]:
            if articles[identifiers[0]] is not articles[identifier]:
                articles[identifiers[0]] += articles[identifier]
                articles[identifier] = articles[identifiers[0]]

        if i % 10000 == 0:
            print "%7d" % i
except:
    pass

i = 0
for group in articles.itervalues():
    groups = recluster(group)
    for group in groups:
        for article in group:
            article = article._asdict()
            article['group'] = i
            article = Article(**article)
            writer.writerow(article)
        i += 1

    if i % 10000 == 0:
        print "%7d" % i