Python ScoreRecord.ScoreRecord Examples

Programming Language: Python

Namespace/Package Name: clustering

Class/Type: ScoreRecord

Method/Function: ScoreRecord

Examples at hotexamples.com: 2

Python ScoreRecord.ScoreRecord - 2 examples found. These are the top rated real world Python examples of clustering.ScoreRecord.ScoreRecord extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ScoreRecord(2)

has_keyword(1)

write_to_es(1)

Example #1

Show file

File: analyze_tweet_file.py Project: nagyistge/GEQE

def analyze_recent(tweet_file_path, analyze_points, models, es_url=None):
    if es_url == None:
        es = elasticsearch.Elasticsearch()
    else:
        es = elasticsearch.Elasticsearch([es_url])

    files = sorted(
        os.listdir(tweet_file_path),
        key=lambda x: os.stat(os.path.join(tweet_file_path, x)).st_mtime)
    for file in files:
        if file in ["analyzed", "live_stream"]:
            continue
        print "analyzing file:", file
        d0 = open(tweet_file_path + "/" + file)
        for line in d0:
            sr = ScoreRecord(line)
            sr.write_to_es("jag_geqestream_documents", "post", es)
        os.rename(tweet_file_path + "/" + file,
                  tweet_file_path + "/analyzed/" + file)

    query = {
        "filter": {
            "bool": {
                "must": [{
                    "range": {
                        "post_date": {
                            "gte": "now-1h"
                        }
                    }
                }]
            }
        }
    }
    n_hits = es.search(index="jag_geqestream_documents",
                       doc_type="post",
                       body=query,
                       search_type="count")['hits']['total']
    scanResp = es.search(index="jag_geqestream_documents",
                         doc_type="post",
                         body=query,
                         search_type="scan",
                         scroll="10m")
    scrollId = scanResp['_scroll_id']
    response = es.scroll(scroll_id=scrollId, scroll="10m")
    bins = {}
    print "\tAnalyzing", n_hits, "hits"
    while n_hits > 0:
        n_hits = n_hits - len(response["hits"]["hits"])
        for hit in response["hits"]["hits"]:
            sr = ScoreRecord(hit, 1)
            k = rec_to_key(sr)
            if k in bins.keys():
                bins[k].add_record(sr)
            else:
                bins[k] = ScoreBin(sr)
        if n_hits > 0:
            response = es.scroll(scroll_id=scrollId, scroll="10m")

    full_bins = filter(lambda x: x.bin_size() > 5, bins.values())
    print "\tScoring", len(full_bins), "bins"
    for fb in full_bins:
        for k, v in models.iteritems():
            fb.apply_model(k, v)
        if len(fb.model_scores.keys()) > 0:
            write_rec = False
            for score in fb.model_scores.values():
                if score > 0.5:
                    write_rec = True
            if write_rec == True:
                fb.save_score(es, "jag_geqestream_points", "post")

Example #2

Show file

File: loadData.py Project: nagyistge/GEQE

import sys