def save_users_timelines(twitter_api, users_ids): for str_id in users_ids: id = int(str_id) timeline = harvest_user_timeline(twitter_api, user_id=id) results = {'timeline': timeline, 'user_id': id} # print(json.dumps(results, indent=1)) DataOperations.save_to_mongo(results, 'users_crawl', 'users_timelines')
def convertCounterToRate(timelyMetric, analyticConfig): # Adjust counter metric into a rate metric df = timelyMetric.getDataFrame() columnNames = df[analyticConfig.groupByColumn].unique() df = DataOperations.pivot(df, timelyMetric.metric, analyticConfig.groupByColumn) df = DataOperations.resample(df, analyticConfig.sample, how=analyticConfig.how) for c in columnNames: df[c] = df[c].diff(1) / (analyticConfig.sample_minutes * 60 * 1000) df = DataOperations.unpivot(df, timelyMetric.metric, analyticConfig.groupByColumn) df = df.dropna() timelyMetric.setDataFrame(df)
def getPCATraingAndTesting(thresh): allData = loadData() trainingData, testingData = do.partionData(allData, .8) trainingX = trainingData.loc[:, features] trainingY = trainingData.loc[:,'label'] testingX = testingData.loc[:, features] testingY = testingData.loc[:, 'label'] #Standardize features #trainingX = StandardScaler().fit_transform(trainingX) pca = PCA() #Run PCA decomposition principalComponents = pca.fit_transform(trainingX) #Compute and print the number of components that PCA will extract numPcaComponents = findBestPCAFeatures(pca.explained_variance_ratio_, thresh) print(f'Components: {numPcaComponents}') principalDf = pd.DataFrame(principalComponents) trainingX = principalDf.iloc[:, 0:numPcaComponents+1] #Plot how each component affects the label #plotPcaComponentsAffectingY(principalDf, trainingY) testingX = pd.DataFrame(pca.transform(testingX)) testingX = testingX.iloc[:, 0:numPcaComponents+1] return trainingX, trainingY, testingX, testingY
def crawl_friends(twitter_api, screen_name, limit=1000, depth=2): # in storage seed_id = str(twitter_api.users.show(screen_name=screen_name)['id']) next_queue = get_friends_followers_ids(twitter_api, user_id=seed_id, friends_limit=limit, followers_limit=0) # Store a seed_id => _follower_ids mapping in MongoDB DataOperations.save_to_mongo({'followers' : [ _id for _id in next_queue ]}, 'users_crawl', 'users_ids'.format(seed_id)) d = 1 while d < depth: d += 1 (queue, next_queue) = (next_queue, []) for fid in queue: follower_ids = get_friends_followers_ids(twitter_api, user_id=fid,friends_limit=limit,followers_limit=0) # Store a fid => follower_ids mapping in MongoDB DataOperations.save_to_mongo({'followers' : [ _id for _id in next_queue ]}, 'users_crawl', 'users_ids') next_queue += follower_ids
def doBestFeatureSelection(clf, numFeatures): multDf = pd.read_csv(os.path.dirname(os.path.abspath(__file__))+'/data/TrainData_Labeled.csv') multTraining, multTesting = do.partionData(multDf, .8) bestFeatures = fs.getBestFeaturesForHigherOrderTerms(clf, multTraining, numFeatures, 'accuracy') #bestFeatures = list(['alcohol', 'volatile acidity*total sulfur dioxide*density*', 'volatile acidity*chlorides*free sulfur dioxide*pH*', 'fixed acidity*volatile acidity*free sulfur dioxide*pH*sulphates*']) print(bestFeatures) trainingData = multTraining.loc[:, bestFeatures] trainingY = multTraining['label'] trainingData.insert(loc = len(trainingData.columns),column='label', value=trainingY) testingData = multTesting.loc[:, bestFeatures] testingY = multTesting['label'] testingData.insert(loc = len(testingData.columns),column='label', value=testingY) print(testingData) do.fitTrainingData(clf, trainingData) do.testClassifier(clf, testingData, "Random Forests")
def graph(self, type="png"): graphConfig = {} graphConfig["title"] = DataOperations.getTitle( self.timelyMetric.metric, self.analyticConfig) return TimelyMetric.graph(self.analyticConfig, self.dataFrame, self.timelyMetric.metric, seriesConfig=self.seriesConfig, graphConfig=graphConfig, notebook=self.notebook, type=type)
def combine_users_tweets(): timelines_cursor = get_users_timeline_from_db() urls_pattern = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+' i=0 for tl in timelines_cursor: i += 1 if i < 10: continue DataOperations.append_to_file("temp", str(tl['user_id'])) for tweet in tl['timeline']: first_text = tweet['text'] lean_text = first_text lean_text.encode('ascii', 'ignore') lean_text = remove_entities(lean_text, urls_pattern) testObj = TextProcesor.TextProcesor() lean_text = testObj.removeSpecialChars(lean_text) lean_text_arr = testObj.splitToTokens(lean_text) lean_text_arr = testObj.removeStopwords(lean_text_arr) lean_text_arr = testObj.removeSingles(lean_text_arr) DataOperations.append_arr_to_file("temp", lean_text_arr) return
def graph(self, type="png"): graphConfig = {} graphConfig["title"] = DataOperations.getTitle(self.timelyMetric, self.analyticConfig) return TimelyMetric.graph(self.analyticConfig, self.dataFrame, self.timelyMetric, seriesConfig=self.seriesConfig, graphConfig=graphConfig, notebook=self.notebook, type=type)
'how': 'mean', 'rolling_average_period': '12 hours', 'min_threshold': None, 'average_min_threshold': None, 'max_threshold': None, 'average_max_threshold': None, 'min_threshold_percentage': -50, 'max_threshold_percentage': 50, 'boolean': 'and', 'min_alert_period': '5 minutes', 'last_alert': '1 hour', 'display': 'all', 'output_dir': '/path/to/output' }) alert = TimelyAnalytic.find_alerts(timelyMetric, analyticConfig) if alert is not None: # write graph to file oldmask = os.umask(022) file = alert.graph(type='html') os.umask(oldmask) text = DataOperations.getTitle(timelyMetric, analyticConfig) # send email with graph attached alert.email("", "", text, text, [file]) # log to syslog alert.log(text)
def get_users_timeline_from_db(): uslov = { 'timeline': { '$not': { '$size': 0 } } } return DataOperations.load_from_mongo('users_crawl', 'users_timelines', True, criteria=uslov)
def find_alerts(timelyMetric, analyticConfig, notebook=False): if (analyticConfig.counter == True): convertCounterToRate(timelyMetric, analyticConfig) df = timelyMetric.getDataFrame() graphDF = TimelyMetric.pivot(df, timelyMetric.metric, groupByColumn=analyticConfig.groupByColumn) if analyticConfig.excludeColRegex is not None: graphDF = graphDF.select(lambda x : not (re.search(analyticConfig.excludeColRegex, x)), axis=1) if analyticConfig.includeColRegex is not None: graphDF = graphDF.select(lambda x : re.search(analyticConfig.includeColRegex, x), axis=1) if analyticConfig.sample is not None: graphDF = TimelyMetric.resample(graphDF, analyticConfig.sample, how=analyticConfig.how) graphDF_avg = pandas.DataFrame(graphDF, copy=True) combined = pandas.DataFrame() seriesConfig = {} for i in graphDF_avg.columns: col = str(i) any_conditions_values = False result_values = np.ones(graphDF[col].shape, bool) if analyticConfig.orCondition: result_values = np.zeros(graphDF[col].shape, bool) any_conditions_average = False result_average = np.ones(graphDF_avg[col].shape, bool) if analyticConfig.orCondition: result_average = np.zeros(graphDF_avg[col].shape, bool) if analyticConfig.min_threshold is not None: currCondition = graphDF[col].astype(float) < analyticConfig.min_threshold result_values = addCondition(analyticConfig.orCondition, result_values, currCondition) any_conditions_values = True if analyticConfig.max_threshold is not None: currCondition = graphDF[col].astype(float) > analyticConfig.max_threshold result_values = addCondition(analyticConfig.orCondition, result_values, currCondition) any_conditions_values = True if analyticConfig.rolling_average_samples is not None: graphDF_avg = TimelyMetric.rolling_average(graphDF_avg, col, rolling_average=analyticConfig.rolling_average_samples) if analyticConfig.min_threshold_percentage is not None: if analyticConfig.min_threshold_percentage >= 0: multiple = 1.0 + (float(abs(analyticConfig.min_threshold_percentage)) / float(100)) else: multiple = 1.0 - (float(abs(analyticConfig.min_threshold_percentage)) / float(100)) currCondition = graphDF[col].astype(float) < (graphDF_avg[col].astype(float) * multiple) result_values = addCondition(analyticConfig.orCondition, result_values, currCondition) any_conditions_values = True if analyticConfig.max_threshold_percentage is not None: if analyticConfig.max_threshold_percentage >= 0: multiple = 1.0 + (float(abs(analyticConfig.max_threshold_percentage)) / float(100)) else: multiple = 1.0 - (float(abs(analyticConfig.max_threshold_percentage)) / float(100)) currCondition = graphDF[col].astype(float) > (graphDF_avg[col].astype(float) * multiple) result_values = addCondition(analyticConfig.orCondition, result_values, currCondition) any_conditions_values = True if analyticConfig.average_min_threshold is not None: currCondition = graphDF_avg[col].astype(float) < analyticConfig.average_min_threshold result_average = addCondition(analyticConfig.orCondition, result_average, currCondition) any_conditions_average = True if analyticConfig.average_max_threshold is not None: currCondition = graphDF_avg[col].astype(float) > analyticConfig.average_max_threshold result_average = addCondition(analyticConfig.orCondition, result_average, currCondition) any_conditions_average = True # if orCondition is AND and no exceptional conditions have been found, then result_values will be all True if any_conditions_values == False: result_values = np.zeros(graphDF[col].shape, bool) exceptional_values = graphDF.loc[result_values, col] # if orCondition is AND and no exceptional conditions have been found, then result_average will be all True if any_conditions_average == False: result_average = np.zeros(graphDF_avg[col].shape, bool) exceptional_average = graphDF_avg.loc[result_average, col] # only keep alerts that are in consecutive periods of length analyticConfig.min_alert_minutes exceptional_values = keepConsecutiveAlerts(graphDF, exceptional_values, analyticConfig.min_alert_minutes) exceptional_average = keepConsecutiveAlerts(graphDF_avg, exceptional_average, analyticConfig.min_alert_minutes) # only evaluate the last analyticConfig.last_alert_minutes if set if analyticConfig.last_alert_minutes is not None: end = datetime.fromtimestamp(timelyMetric.timeDateRange.getEndMs() / 1000.00, DataOperations.utc) recentEnoughBegin = end - timedelta(minutes=analyticConfig.last_alert_minutes) exceptional_values = exceptional_values.ix[recentEnoughBegin:end] exceptional_average = exceptional_average.ix[recentEnoughBegin:end] anyValueExceptions = exceptional_values.size > 0 anyAverageExceptions = exceptional_average.size > 0 if (analyticConfig.display.lower() == "all") or (analyticConfig.display.lower() == "alerts" and anyValueExceptions): combined[col] = graphDF[col] if analyticConfig.rolling_average_samples is not None: if (analyticConfig.display.lower() == "all"): combined[col + '_avg'] = graphDF_avg[col] else: if (anyAverageExceptions): combined[col + '_avg'] = graphDF_avg[col] if (anyValueExceptions and (analyticConfig.min_threshold_percentage is not None or analyticConfig.max_threshold_percentage is not None)): combined[col + '_avg'] = graphDF_avg[col] if ((analyticConfig.display.lower() == "all") or (analyticConfig.display.lower() == "alerts" and anyValueExceptions)): combined[col + '_warn'] = exceptional_values.dropna() seriesConfig[col + '_warn'] = { "mode" : "markers", "marker" : { "symbol" : "hash-open", "color" : "red" } } if ((analyticConfig.display.lower() == "all") or (analyticConfig.display.lower() == "alerts" and anyAverageExceptions)): combined[col + '_avg_warn'] = exceptional_average.dropna() seriesConfig[col + '_avg_warn'] = { "mode" : "markers", "marker" : { "symbol" : "hash-open", "color" : "red" } } timelyAlert = None if not combined.empty: alertAnalyticConfig = TimelyAnalyticConfiguration(analyticConfig) if alertAnalyticConfig.groupByColumn is None: alertAnalyticConfig.groupByColumn = timelyMetric.metric + "_obs" combined = TimelyMetric.unpivot(combined, timelyMetric.metric, groupByColumn=alertAnalyticConfig.groupByColumn) combined = combined.sort_index() combined['date'] = combined.index.values combined = combined.sort_values(['date', alertAnalyticConfig.groupByColumn]) combined = combined.drop(['date'], 1) combined = combined.dropna() combined = DataOperations.ensureMinSeriesLength(combined, alertAnalyticConfig.groupByColumn) message = DataOperations.getTitle(timelyMetric, analyticConfig, separator=', ') timelyAlert = TimelyAlert(timelyMetric, combined, message, seriesConfig, alertAnalyticConfig, notebook) return timelyAlert
def __iter__(self): for cursor in DataOperations.load_from_mongo('users_crawl', 'users_vectors', True): # assume there's one document per line, tokens separated by whitespace yield cursor['vector']
def find_alerts(timelyMetric, analyticConfig, notebook=False): df = timelyMetric.getDataFrame() graphDF = TimelyMetric.pivot(df, timelyMetric.metric, groupByColumn=analyticConfig.groupByColumn) if analyticConfig.excludeColRegex is not None: graphDF = graphDF.select(lambda x : not (re.search(analyticConfig.excludeColRegex, x)), axis=1) if analyticConfig.includeColRegex is not None: graphDF = graphDF.select(lambda x : re.search(analyticConfig.includeColRegex, x), axis=1) if analyticConfig.sample is not None: graphDF = TimelyMetric.resample(graphDF, analyticConfig.sample, how=analyticConfig.how) graphDF_avg = pandas.DataFrame(graphDF, copy=True) combined = pandas.DataFrame() now = Timestamp.now() seriesConfig = {} for i in graphDF_avg.columns: col = str(i) any_conditions_values = False result_values = np.ones(graphDF[col].shape, bool) if analyticConfig.orCondition: result_values = np.zeros(graphDF[col].shape, bool) any_conditions_average = False result_average = np.ones(graphDF_avg[col].shape, bool) if analyticConfig.orCondition: result_average = np.zeros(graphDF_avg[col].shape, bool) if analyticConfig.min_threshold is not None: currCondition = graphDF[col].astype(float) < analyticConfig.min_threshold result_values = addCondition(analyticConfig.orCondition, result_values, currCondition) any_conditions_values = True if analyticConfig.max_threshold is not None: currCondition = graphDF[col].astype(float) > analyticConfig.max_threshold result_values = addCondition(analyticConfig.orCondition, result_values, currCondition) any_conditions_values = True if analyticConfig.rolling_average_samples is not None: graphDF_avg = TimelyMetric.rolling_average(graphDF_avg, col, rolling_average=analyticConfig.rolling_average_samples) if analyticConfig.alert_percentage is not None: if analyticConfig.alert_percentage > 0: multiple = 1.0 + (float(abs(analyticConfig.alert_percentage)) / float(100)) currCondition = graphDF[col].astype(float) > (graphDF_avg[col].astype(float) * multiple) result_values = addCondition(analyticConfig.orCondition, result_values, currCondition) any_conditions_values = True if analyticConfig.alert_percentage < 0: multiple = 1.0 - (float(abs(analyticConfig.alert_percentage)) / float(100)) if multiple > 0: currCondition = graphDF[col].astype(float) < (graphDF_avg[col].astype(float) * multiple) result_values = addCondition(analyticConfig.orCondition, result_values, currCondition) any_conditions_values = True if analyticConfig.average_min_threshold is not None: currCondition = graphDF_avg[col].astype(float) < analyticConfig.average_min_threshold result_average = addCondition(analyticConfig.orCondition, result_average, currCondition) any_conditions_average = True if analyticConfig.average_max_threshold is not None: currCondition = graphDF_avg[col].astype(float) > analyticConfig.average_max_threshold result_average = addCondition(analyticConfig.orCondition, result_average, currCondition) any_conditions_average = True # if orCondition is AND and no exceptional conditions have been found, then result_values will be all True if any_conditions_values == False: result_values = np.zeros(graphDF[col].shape, bool) exceptional_values = graphDF.loc[result_values, col] # if orCondition is AND and no exceptional conditions have been found, then result_average will be all True if any_conditions_average == False: result_average = np.zeros(graphDF_avg[col].shape, bool) exceptional_average = graphDF_avg.loc[result_average, col] # only evaluate the last analyticConfig.last_alert_minutes if set if analyticConfig.last_alert_minutes is not None: recentEnoughBegin = now - timedelta(minutes=analyticConfig.last_alert_minutes) exceptional_values = exceptional_values.ix[recentEnoughBegin:now] exceptional_average = exceptional_average.ix[recentEnoughBegin:now] # only keep alerts that are in consecutive periods of length analyticConfig.min_alert_minutes exceptional_values = keepConsecutiveAlerts(graphDF, exceptional_values, analyticConfig.min_alert_minutes) exceptional_average = keepConsecutiveAlerts(graphDF_avg, exceptional_average, analyticConfig.min_alert_minutes) anyValueExceptions = exceptional_values.size > 0 anyAverageExceptions = exceptional_average.size > 0 if (analyticConfig.display.lower() == "all") or (analyticConfig.display.lower() == "alerts" and anyValueExceptions): combined[col] = graphDF[col] if analyticConfig.rolling_average_samples is not None: if (analyticConfig.display.lower() == "all") or (analyticConfig.display.lower() == "alerts" and (anyAverageExceptions or analyticConfig.alert_percentage is not None)): combined[col+'_avg'] = graphDF_avg[col] if ((analyticConfig.display.lower() == "all") or (analyticConfig.display.lower() == "alerts" and anyValueExceptions)): combined[col + '_warn'] = exceptional_values.dropna() seriesConfig[col + '_warn'] = { "mode" : "markers", "marker" : { "symbol" : "hash-open", "color" : "red" } } if ((analyticConfig.display.lower() == "all") or (analyticConfig.display.lower() == "alerts" and anyAverageExceptions)): combined[col + '_avg_warn'] = exceptional_average.dropna() seriesConfig[col + '_avg_warn'] = { "mode" : "markers", "marker" : { "symbol" : "hash-open", "color" : "red" } } timelyAlert = None if not combined.empty: alertAnalyticConfig = TimelyAnalyticConfiguration(analyticConfig) if alertAnalyticConfig.groupByColumn is None: alertAnalyticConfig.groupByColumn = timelyMetric.metric + "_obs" combined = TimelyMetric.unpivot(combined, timelyMetric.metric, groupByColumn=alertAnalyticConfig.groupByColumn) combined = combined.sort_index() combined['date'] = combined.index.values combined = combined.sort_values(['date', analyticConfig.groupByColumn]) combined = combined.drop(['date'], 1) combined = combined.dropna() combined = DataOperations.ensureMinSeriesLength(combined, alertAnalyticConfig.groupByColumn) message = DataOperations.getTitle(timelyMetric, analyticConfig, separator=', ') timelyAlert = TimelyAlert(timelyMetric, combined, message, seriesConfig, alertAnalyticConfig, notebook) return timelyAlert
def find_alerts(timelyMetric, analyticConfig, notebook=False): if analyticConfig.groupByColumn is None: return df = timelyMetric.getDataFrame() graphDF = TimelyMetric.pivot(df, timelyMetric.metric, groupByColumn=analyticConfig.groupByColumn) if analyticConfig.sample is not None: graphDF = TimelyMetric.resample(graphDF, analyticConfig.sample, how=analyticConfig.how) if analyticConfig.excludeColRegex is not None: graphDF = graphDF.select( lambda x: not (re.search(analyticConfig.excludeColRegex, x)), axis=1) if analyticConfig.includeColRegex is not None: graphDF = graphDF.select( lambda x: re.search(analyticConfig.includeColRegex, x), axis=1) graphDF_avg = pandas.DataFrame(graphDF, copy=True) combined = pandas.DataFrame() seriesConfig = {} for i in graphDF_avg.columns: col = str(i) anyConditions = False result = np.ones(graphDF[col].shape, bool) if analyticConfig.orCondition: result = np.zeros(graphDF[col].shape, bool) if analyticConfig.min_threshold is not None: currCondition = graphDF[col].astype( float) < analyticConfig.min_threshold result = addCondition(analyticConfig.orCondition, result, currCondition) anyConditions = True if analyticConfig.max_threshold is not None: currCondition = graphDF[col].astype( float) > analyticConfig.max_threshold result = addCondition(analyticConfig.orCondition, result, currCondition) anyConditions = True graphDF_avg = TimelyMetric.rolling_average( graphDF_avg, str(i), rolling_average=analyticConfig.rolling_average) if (analyticConfig.alert_percentage is not None) and (analyticConfig.rolling_average is not None): if analyticConfig.alert_percentage > 0: multiple = 1.0 + (float(abs(analyticConfig.alert_percentage)) / float(100)) currCondition = graphDF[col].astype(float) > ( graphDF_avg[col].astype(float) * multiple) result = addCondition(analyticConfig.orCondition, result, currCondition) anyConditions = True if analyticConfig.alert_percentage < 0: multiple = 1.0 - (float(abs(analyticConfig.alert_percentage)) / float(100)) if multiple > 0: currCondition = graphDF[col].astype(float) < ( graphDF_avg[col].astype(float) * multiple) result = addCondition(analyticConfig.orCondition, result, currCondition) anyConditions = True if anyConditions == False: result = np.zeros(graphDF[col].shape, bool) exceptional = graphDF.loc[result, col] if (analyticConfig.display.lower() == "all") or (analyticConfig.display.lower() == "alerts" and exceptional.size > 0): combined[col] = graphDF[col] if ((analyticConfig.rolling_average is not None) and ((analyticConfig.display.lower() == "all") or (analyticConfig.display.lower() == "alerts" and exceptional.size > 0 and analyticConfig.alert_percentage is not None))): combined[col + '_avg'] = graphDF_avg[col] if (exceptional.size > 0): combined[col + '_warn'] = exceptional.dropna() seriesConfig[col + '_warn'] = { "mode": "markers", "marker": { "symbol": "hash-open", "color": "red" } } timelyAlert = None if not combined.empty: combined = TimelyMetric.unpivot( combined, timelyMetric.metric, groupByColumn=analyticConfig.groupByColumn) combined = combined.sort_index() combined['date'] = combined.index.values combined = combined.sort_values(['date', analyticConfig.groupByColumn]) combined = combined.drop(['date'], 1) combined = combined.dropna() combined = DataOperations.ensureMinSeriesLength( combined, analyticConfig.groupByColumn) timelyAlert = TimelyAlert(timelyMetric, combined, seriesConfig, analyticConfig, notebook) return timelyAlert
def get_users_friends_from_db(): friends_ids = DataOperations.load_from_mongo('users_crawl', 'users_ids') return friends_ids
'how' : 'mean', 'rolling_average_period' : '12 hours', 'min_threshold' : None, 'average_min_threshold' : None, 'max_threshold' : None, 'average_max_threshold' : None, 'alert_percentage' : 25, 'boolean' : 'and', 'min_alert_period' : '5 minutes', 'last_alert' : '1 hour', 'display' : 'all', 'output_dir' : '/path/to/output' }) alert = TimelyAnalytic.find_alerts(timelyMetric, analyticConfig) if alert is not None: # write graph to file oldmask = os.umask(022) file = alert.graph(type='html') os.umask(oldmask) text = DataOperations.getTitle(timelyMetric, analyticConfig) # send email with graph attached alert.email("", "", text, text, [file]) # log to syslog alert.log(text)
except: print("Please Enter Correct Input") #Requesting Webpage uClient = uReq(my_url) page_html = uClient.read() uClient.close() page_soup = soup(page_html, "html.parser") # Using HTML Parser # These Are the Three Different Type Of Html Containers present on Flipkart vertical_containers = page_soup.find_all("div", {"style": "width:25%"}) horizontal_containers = page_soup.find_all("div", {"class": "_3pLy-c row"}) special_type = page_soup.find_all("div", {"class": "_2B099V"}) for n in range(1, pages + 1): if special_type: watchTypeContainer.watchContainer(special_type, query, file, n) elif horizontal_containers: horizontalContainersFlipkart.horizontalContainer( horizontal_containers, query, file, n) elif vertical_containers: verticalContainersFlipkart.verticalContainer( vertical_containers, query, file, n) file.close() if __name__ == '__main__': main() DataOperations.analyzingData()
def save_single_user_timeline(twitter_api, user_id): id = int(user_id) timeline = harvest_user_timeline(twitter_api, user_id=id) results = {'timeline': timeline, 'user_id': id} DataOperations.save_to_mongo(results, 'users_crawl', 'users_timelines')