예제 #1
0
def get_month_after_limited(percent_change_file):
    econ_seq = load_percent_change(percent_change_file)
    sorted_econ = sorted(econ_seq, key=econ_seq.get)

    for x in sorted_econ:
        print (x, econ_seq[x])

    # take top 10% as good months and bottom 10% as bad months
    num = int(len(econ_seq) / 6)
    prev_bad = sorted_econ[:num]
    prev_good = sorted_econ[-num:]
    prev_good = reversed(prev_good)

    def plus_month(seq):
        next_month = timedelta(days=31)
        new_seq = []
        year_to_count = defaultdict(int)
        for x in seq:
            if year_to_count[x.year] >= 2:
                continue
            year_to_count[x.year] += 1
            new_date = x + next_month
            floored_date = date(new_date.year, new_date.month, 1)
            new_seq.append(floored_date)
        return new_seq

    return plus_month(prev_good), plus_month(prev_bad)
예제 #2
0
def get_same_month(percent_change_file):
    econ_seq = load_percent_change(percent_change_file)
    sorted_econ = sorted(econ_seq, key=econ_seq.get)

    # take top 10% as good months and bottom 10% as bad months
    num = int(len(econ_seq) / 6)
    prev_bad = sorted_econ[:num]
    prev_good = sorted_econ[-num:]

    return prev_good, prev_bad
예제 #3
0
def get_month_just_after(percent_change_file):
    econ_seq = load_percent_change(percent_change_file)
    sorted_econ = sorted(econ_seq, key=econ_seq.get)

    # take top 10% as good months and bottom 10% as bad months
    num = int(len(econ_seq) / 6)
    prev_bad = sorted_econ[:num]
    prev_good = sorted_econ[-num:]

    def plus_month(seq):
        next_month = timedelta(days=31)
        new_seq = []
        for x in seq:
            new_date = x + next_month
            floored_date = date(new_date.year, new_date.month, 1)
            new_seq.append(floored_date)
        return new_seq

    return plus_month(prev_good), plus_month(prev_bad)
예제 #4
0
def get_good_month_prev(percent_change_file, percent=6):
    econ_seq = load_percent_change(percent_change_file)
    sorted_econ = sorted(econ_seq, key=econ_seq.get)

    # take top 10% as bad months and following 10% as good months
    num = int(len(econ_seq) / percent)
    prev_good = sorted_econ[-num:]

    def plus_month(seq):
        next_month = timedelta(days=31)
        new_seq = []
        for x in seq:
            new_date = x + next_month
            floored_date = date(new_date.year, new_date.month, 1)
            new_seq.append(floored_date)
        return new_seq

    # take month of upturn as "bad" and next month as "good"
    return plus_month(prev_good), prev_good

    # take month of upturn as "good" and previous month as bad
    return  prev_good, minus_month(prev_good)
예제 #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--percent_change",
        default=
        "/usr1/home/anjalief/corpora/russian/percent_change/russian_rtsi_rub.csv"
    )
    parser.add_argument(
        "--input_path",
        default="/usr1/home/anjalief/corpora/russian/yearly_mod_subs/iz_lower/"
    )
    parser.add_argument("--keywords", default="./keywords.txt")
    args = parser.parse_args()

    # DANGER DANGER DANGER
    keywords = [l.strip() for l in open(args.keywords).readlines()]

    econ_dict = load_percent_change(args.percent_change)
    econ_seq = [econ_dict[d] for d in sorted(econ_dict)]

    prior = LoadBackgroundCorpus(args.input_path)
    frame_to_lex = pickle.load(open("frame_to_lex.pickle", "rb"))

    date_seq, filenames = get_files_by_time_slice(args.input_path, "monthly")
    prev_e = None
    prev_i = None

    frame_to_seq_e = defaultdict(list)
    frame_to_seq_i = defaultdict(list)

    for d, filename in zip(date_seq, filenames):
        curr_e, curr_i, _, _, _ = LoadCountsExternal(filename, keywords)

        if not prev_e:
            prev_e = curr_e
            prev_i = curr_i
            continue

        delta_e = write_log_odds(prev_e, curr_e, prior)
        delta_i = write_log_odds(prev_i, curr_i, prior)

        for c in frame_to_lex:
            summary_e = 0
            summary_i = 0
            for word in frame_to_lex[c]:
                if word in delta_e:
                    summary_e += delta_e[word]
                else:
                    print("E Skipping ", word)
                if word in delta_i:
                    summary_i += delta_i[word]
                else:
                    print("I Skipping ", word)
            frame_to_seq_e[c].append(summary_e)
            frame_to_seq_i[c].append(summary_i)

        prev_e = curr_e
        prev_i = curr_i

    # Done processing files
    print(
        "-------------------------------------------------------------------------------"
    )
    print("EXTERNAL")
    print(
        "-------------------------------------------------------------------------------"
    )
    for c in frame_to_seq_e:
        print(c)
        print_stats(econ_seq, frame_to_seq_e[c])
        print(
            "******************************************************************************"
        )

    print(
        "-------------------------------------------------------------------------------"
    )
    print("INTERNAL")
    print(
        "-------------------------------------------------------------------------------"
    )
    for c in frame_to_seq_i:
        print(c)
        print_stats(econ_seq, frame_to_seq_i[c])
        print(
            "******************************************************************************"
        )