def time_seq_count():
    time_seqs_per_label = {}

    for fname in os.listdir(data_dir):
        full_fname = os.path.join(data_dir, fname)
        for line in open(full_fname):
            created_at, user_id, label, text = line.rstrip('\n').split('\t')
            time_seq = timeSeq(created_at)

            if not ok_without_rt(text):
                continue

            if label not in time_seqs_per_label:
                time_seqs_per_label[label] = []

            time_seqs_per_label[label].append(time_seq)

    # Let's aggregate into a single table
    labels = map(itemgetter(0), time_seqs_per_label.items())
    #print labels

    counts_per_time_unit = {}
    for label, time_seqs in time_seqs_per_label.iteritems():
        c = sorted(Counter(time_seqs).most_common(), key=itemgetter(0))
        for (time_seq, count) in c:
            if time_seq not in counts_per_time_unit:
                counts_per_time_unit[time_seq] = {}
                for l in labels:
                    counts_per_time_unit[time_seq][l] = 0

            counts_per_time_unit[time_seq][label] = count

    return (counts_per_time_unit, labels)
def extract_specific_time_music(week_num, music_label):
    user_ids = []

    for fname in os.listdir(data_dir):
        full_fname = os.path.join(data_dir, fname)
        for line in open(full_fname):
            created_at, user_id, label, text = line.rstrip('\n').split('\t')
            time_seq = timeSeq(created_at)
            if time_seq == week_num and label == music_label:
                user_ids.append(user_id)

    user_ids = list(set(user_ids))
    return user_ids
def extract_show(show_label):
    user_ids = []

    for fname in os.listdir(data_dir):
        full_fname = os.path.join(data_dir, fname)
        for line in open(full_fname):
            created_at, user_id, label, text = line.rstrip('\n').split('\t')

            if ok_without_rt(text):
                time_seq = timeSeq(created_at)
                if label == show_label:
                    user_ids.append(user_id)

    user_ids = list(set(user_ids))
    return user_ids