def word2id(sentences, word2idx, seq_length):
    idx = []
    all_length = []
    global word2idx_
    word2idx_ = word2idx
    for sentence in sentences:
        try:
            sentence = sentence.strip().decode('utf-8')
            sentence = re.sub(punc, u' ', sentence).strip()
            words = WordPunctTokenizer().tokenize(sentence)
        except:
            print(sentence)
        if len(words) < seq_length:
            all_length.append(len(words))
            for _ in range(len(words), seq_length):
                words.append('<0>')
        elif len(words) > seq_length:
            words = words[:seq_length]
            all_length.append(seq_length)
        else:
            all_length.append(seq_length)
        id = list(map(get_id, words))
        idx.append(id)
    return np.array(idx), np.array(all_length)