Python extractOriginStatus Examples

Programming Language: Python

Namespace/Package Name: utils

Method/Function: extractOriginStatus

Examples at hotexamples.com: 2

Python extractOriginStatus - 2 examples found. These are the top rated real world Python examples of utils.extractOriginStatus extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: readStatus.py Project: ifff/microblogfilter

def parseStreamFromTime(year, month, day, streamDir, parsedDir):  # start from one specific day
    cur_time = datetime.datetime(year, month, day, 0)
    cur_suffix = cur_time.strftime("%Y-%m-%d-%H")
    # suffix2 = cur_time.strftime('%Y-%m-%d')
    # cur_day = cur_time.strftime('%d')
    # cur_parsed_file = open(cur_parsed_path, 'w')
    while True:
        cur_stream_path = "%s/statuses.log.%s.gz" % (streamDir, cur_suffix)
        cur_parsed_path = "%s/statuses.parsed.%s" % (parsedDir, cur_suffix)
        if os.path.exists(cur_stream_path):  # current gzip file is ready
            print "current parsed stream file is %s (ready)" % cur_stream_path
            cur_parsed_file = open(cur_parsed_path, "w")
            with gzip.open(cur_stream_path, "rt") as cur_stream_file:
                for lineno, line in enumerate(cur_stream_file):
                    out_str = extractStatus(line)
                    if out_str == "":
                        continue  # skip empty tweet after stemmered, stopword removal
                    origin_str = extractOriginStatus(line)
                    out_str = out_str + "\t" + origin_str + "\n"
                    cur_parsed_file.write(out_str)
            cur_stream_file.close()
            cur_parsed_file.close()
            cur_time = cur_time + datetime.timedelta(hours=1)  # change to next hour
            # check whether day changes
            # new_day = cur_time.strftime('%d')
            # if new_day != cur_day:break
            cur_suffix = cur_time.strftime("%Y-%m-%d-%H")
        else:
            # sleep to wait
            print "current parsed stream file is %s (not ready), sleep to wait..." % cur_stream_path
            time.sleep(30)

Example #2

Show file

File: readStatus.py Project: ifff/microblogfilter

def parseCurStream():
    cur_time = datetime.datetime.now()
    suffix1 = cur_time.strftime("%Y-%m-%d-%H")
    suffix2 = cur_time.strftime("%Y-%m-%d-%H")
    cur_day = cur_time.strftime("%d")
    print "current parsed stream is %s" % suffix1
    cur_stream_path = "../statuses.log.%s" % suffix1
    cur_parsed_path = "./parsed/statuses.parsed.%s" % suffix2
    # cur_parsed_file = open(cur_parsed_path, 'w')

    # next hour stream
    next_time = cur_time + datetime.timedelta(hours=1)
    next_suffix = next_time.strftime("%Y-%m-%d-%H")
    next_stream_path = "../statuses.log.%s" % next_suffix

    while True:
        if os.path.exists(cur_stream_path):
            cur_stream_file = open(cur_stream_path, "r")
            cur_parsed_file = open(cur_parsed_path, "w")
            while True:
                line = cur_stream_file.readline()
                if not line and os.exists(next_stream_path):  # change to next hour
                    break
                out_str = extractStatus(line)
                if out_str == "":
                    continue  # skip empty tweet after stemmered, stopword removal
                origin_str = extractOriginStatus(line)
                out_str = out_str + "\t" + origin_str + "\n"
                cur_parsed_file.write(out_str)
            cur_stream_file.close()
            cur_parsed_file.close()
            # add one hour, update time
            cur_time = datetime.datetime.now()
            suffix1 = cur_time.strftime("%Y-%m-%d-%H")
            cur_stream_path = "../statuses.log.%s" % suffix1
            print "current parsed stream is %s" % suffix1
            next_time = cur_time + datetime.timedelta(hours=1)
            next_suffix = next_time.strftime("%Y-%m-%d-%H")
            next_stream_path = "../statuses.log.%s" % next_suffix
            # check whether day changes
            new_day = cur_time.strftime("%d")
            if new_day != cur_day:
                cur_parsed_file.close()
                suffix2 = cur_time.strftime("%Y-%m-%d")
                cur_parsed_path = "./parsed/statuses.parsed.%s" % suffix2
                cur_parsed_file = open(cur_parsed_path, "w")