def wordVec_facebook(sents, path_w, name_w, win_size):
    list_all = list()
    for i in range(0, len(sents)):
        split_sent = sents[i].split()
        tokens = list()
        for token in split_sent:
            token_filter = filter_eachTok_rmLinks(token, 'model')
            if len(token_filter) > 0:
                tokens.append(token_filter.lower())
        print i
        list_all.append(tokens)

    model = gensim.models.Word2Vec(list_all, size=win_size, window=5, min_count=1, workers=5)
    print model.most_similar(['bus'])

    list_write = list()
    for i in range(0, len(model.index2word)):
        # print model.index2word[i], model.syn0norm[i]
        line = model.index2word[i]
        for value in model.syn0norm[i]:
            line += '\t' + str(value)
        line = line.strip()
        list_write.append(line)
        print line
    write_file(path_w, name_w + '_%i' % win_size, list_write)
def load_facebook(texts):
    posts = list()
    for t in texts:
        split_t = t.split("\t")
        post = ""
        for w in split_t[2].split():
            post += filter_eachTok_rmLinks(w, "model") + " "
        posts.append(post.strip().lower())
    return posts
def load_tweets(texts):
    tweets = list()
    for t in texts:
        split_t = t.split("\t")
        tweet = ""
        for w in split_t[2].split():
            tweet += filter_eachTok_rmLinks(w, "twitter") + " "
        tweets.append(tweet.strip().lower())
    return tweets
def check_token(token, command):
    if command == 'twitter':
        text = filter_eachTok_rmLinks(token, command)
        if len(text.strip()) == 0:
            return True
        else:
            return False
    if command == 'sgforums':
        text = filter_eachToken(token, command)
        if len(text.strip()) == 0:
            return True
        else:
            return False
    if command == 'facebook':
        text = filter_eachTok_rmLinks(token, command)
        if len(text.strip()) == 0:
            return True
        else:
            return False
def filtering_text_demo(list_line, command):
    list_demo = list()
    for line in list_line:
        text = ''
        split_line = line.split()
        for token in split_line:
            token_filter = filter_eachTok_rmLinks(token, command)

            if len(token_filter) != 0:
                text += token_filter + '\t'

        # print text.strip()
        list_demo.append(text.strip())
        list_demo.append('\n')
        list_demo.append('\n')
    return list_demo
def filterText_demo(list_line, command, command_data):
    list_convert = list()
    for i in range(0, len(list_line)):
        text = ''
        split_text = list_line[i].strip().split()
        for token_ in split_text:

            if command == 'removePunc':  # remove all punctuations
                token_filter = filter_eachToken(token_, command_data)
            elif command == 'removeLink':  # remove all punctuations and links in token
                token_filter = filter_eachTok_rmLinks(token_, command_data)
            else:
                print 'You need to give the correct command'
                quit()

            if len(token_filter) != 0:
                text += token_filter + '\t'

        list_convert.append(text.strip())
    return list_convert