count_fourgrams.update(terms_fourgram) count_fivegrams.update(terms_fivegram) count_sixgrams.update(terms_sixgram) count_sevengrams.update(terms_sevengram) count_eightgrams.update(terms_eightgram) count_ninegrams.update(terms_ninegram) #Progress Bar: counter = counter + 1 percentage = int((counter*100/limit)) percentageBar = int(percentage/5) missingPercentageBar = 20 - percentageBar post.features = terms_all post.hashtags = terms_hashtags post.bigrams = terms_bigram #sys.stdout.write("\r %d %d %d %d " % (counter,limit,percentage,missingPercentage)) sys.stderr.write("\r¦|%s|¦%d%%. Examinated %d of %d" % (("░")*percentageBar+" "*missingPercentageBar, percentage, counter, limit) ) sys.stderr.flush() print("\n") wordsTopNum = minValue(opts.wordsTopNum,4000) hashtagTopNum = minValue(opts.hashtagTopNum,3000) ngramsTopNum = minValue(opts.ngramsTopNum,2000) #sys.exit(0) com_max = [] # For each term, look for the most common co-occurrent terms