from gensim import corpora, models, similarities
from gensim.corpora import SvmLightCorpus

input_file = sys.argv[1]
output_file = sys.argv[2]
try:
	num_topics = int( sys.argv[3] )
except IndexError:
	num_topics = 50

###

print "loading data..."
print time.strftime("%H:%M:%S", time.localtime())

c = SvmLightCorpus( input_file )

print "starting tf-idf..."
print

tfidf = models.TfidfModel( c )
c_tfidf = tfidf[c]

print "running RP..."
print time.strftime("%H:%M:%S", time.localtime())
print

rp = models.RpModel( c_tfidf, num_topics = num_topics )

print "converting corpus to RP..."
print time.strftime("%H:%M:%S", time.localtime())
import sys, time
from gensim import corpora, models, similarities
from gensim.corpora import SvmLightCorpus

input_file = sys.argv[1]
output_file = sys.argv[2]

###

print "loading data..."
print time.strftime("%H:%M:%S", time.localtime())

c = SvmLightCorpus( input_file )

print "starting tf-idf..."
print

tfidf = models.TfidfModel( c )
c_tfidf = tfidf[c]

print "saving..."
print

SvmLightCorpus.serialize( output_file, c_tfidf  )

print "done."
print time.strftime("%H:%M:%S", time.localtime())