def compressTweets(data_dir):
    assert(os.path.exists(data_dir))

    tick = Tick()

    for fname in os.listdir(data_dir):
        isFirst = True
        lines = []
        new_contents = ""

        for line in open(os.path.join(data_dir, fname)):
            try:
                jobj = json.loads(line)
                if not isFirst:
                    del jobj['user']

                lines.append(json.dumps(jobj))

            except (KeyError, ValueError):
                continue

            isFirst = False

        new_contents = '\n'.join(lines)

        fout = open(os.path.join(data_dir, fname), 'w')
        fout.write(new_contents)
        fout.close()

        tick.tick()
def tweets2Texts(input_dir, output_dir):
    ticker = Tick()

    for fname in os.listdir(input_dir):
        user_id = fname

        # collect texts
        texts = []
        for line in open(os.path.join(input_dir, fname)):
            try:
                jobj = json.loads(line)
                t = jobj["text"]
                t = re.sub(r"\s+", " ", t)
                texts.append(t)
            except (KeyError, ValueError):
                continue
        text = "\n".join(texts)

        # write to a file
        fout = open(os.path.join(output_dir, fname), "w")
        fout.write(text.encode("ascii", "ignore"))
        fout.close()

        ticker.tick()