def getFreqRepr(X, D):
    freqX = OrderedDict()
    reprX = OrderedDict()
    for i, w in enumerate(X.words):
        freqX[w] = X.freq[i]
        reprX[w] = {j: X.features[i, j] for j in xrange(D)}
    return freqX, reprX

# read input parameters
if __name__ == '__main__':
    N = int(sys.argv[1])
    D = int(sys.argv[2])
    Nseed = int(sys.argv[3])
    # make X,Y mock data
    (X, Y, pi) = make(N, Nseed, D)
    seed = [(i, i) for i in xrange(Nseed)]
    # write to CSV files

    freqX, reprX = getFreqRepr(X, D)
    freqY, reprY = getFreqRepr(Y, D)
    IO.writePickledWords('pockX.txt', freqX, reprX)
    IO.writePickledWords('pockY.txt', freqY, reprY)

    IO.writeWords('mockX.txt', X)
    IO.writeWords('mockY.txt', Y)
    IO.writeSeed('seedXY.txt', seed)
    print X.asTuple()
    # now need to save


        freq = top_nouns[word]
        sys.stdout.write(str(freq))
        sys.stdout.write(',')
        V = [context_features[word][other_word] for other_word in noun_keys]
        print ','.join([str(v) for v in V])


if __name__ == '__main__':
    global verbosity
    verbosity = 0
    filename_text = sys.argv[1]
    filename_tags = sys.argv[2]
    N = int(sys.argv[3])
    lang = (sys.argv[4])
    assert lang == 'en' or lang == 'es'
    if lang == 'en':
        accept_tags = ['NN', 'NNS', 'NP', 'NPS']
    elif lang == 'es':
        accept_tags = ['NC', 'NP']
    out_filename = lang + '_' + 'pickled_N='+str(N)+'.txt'

    common.log(100, 'Extracting', N, 'top nouns', '-- accepted tags:', accept_tags)
    top_nouns_freq, context_features, feature_names = extract(filename_text, filename_tags, accept_tags, N)

    # sort by frequency (descending)
    context_features0 = OrderedDict()
    for noun in top_nouns_freq:
        context_features0[noun] = context_features[noun]

    IO.writePickledWords(out_filename, top_nouns_freq, context_features0, feature_names)