from Request import Request # Create request object to handle user input. q = Request() months = {"Jan": 1, "Feb": 2, "Mar": 3, "Apr": 4, "May": 5, "Jun": 6, "Jul": 7, "Aug": 8, "Sep": 9, "Oct": 10, "Nov": 11, "Dec": 12} date = q.date.split() month = date[0] day = date[1] # connect to Mongo and search based on criteria q.connect() criteria = {"lang": "en", "created_at": {'$regex': q.date}, "text": {"$not": re.compile("RT")}} cursor = q.search(criteria, {"text": 1}) # load tweet with id corpus = [] ids = [] tweet_filter = Filter(25) for document in cursor: text = ' '.join(document["text"].encode("utf-8").split()) corpus.append(text) ids.append(document["_id"]) # filter repeated tweets t0 = time() i = 0 status = -1 unique_tweets = ["Dummy Tweet"]
return json.JSONEncoder.default(self, o) # Create request object to handle user input. q = Request() months = {"Jan": 1, "Feb": 2, "Mar": 3, "Apr": 4, "May": 5, "Jun": 6, "Jul": 7, "Aug": 8, "Sep": 9, "Oct": 10, "Nov": 11, "Dec": 12} date = q.date.split() month = date[0] day = date[1] # connect to Mongo and search based on criteria q.connect() criteria = {"lang": "en", "created_at": {'$regex': q.date}, "text": {"$not": re.compile("RT")}} cursor = q.search(criteria).limit(1000) # Open file I/O streams directory = os.path.dirname(os.getcwd()) fn = "sample_" + str(months[month]) + "_" + str(day) + ".json" f = open(directory + "/data/" + fn, "w+") # load tweet with id corpus = [{"text": "dummy"}] tweetFilter = Filter(45) i = 0 print("Filtering Results...") for document in cursor: document["_id"] = str(document["_id"]) document["text"] = document["text"].replace('"', "'") for tweet in corpus: