예제 #1
0
def save_users_timelines(twitter_api, users_ids):

    for str_id in users_ids:
        id = int(str_id)

        timeline = harvest_user_timeline(twitter_api, user_id=id)
        results = {'timeline': timeline, 'user_id': id}
        # print(json.dumps(results, indent=1))
        DataOperations.save_to_mongo(results, 'users_crawl', 'users_timelines')
예제 #2
0
def convertCounterToRate(timelyMetric, analyticConfig):
    # Adjust counter metric into a rate metric
    df = timelyMetric.getDataFrame()
    columnNames = df[analyticConfig.groupByColumn].unique()
    df = DataOperations.pivot(df, timelyMetric.metric, analyticConfig.groupByColumn)
    df = DataOperations.resample(df, analyticConfig.sample, how=analyticConfig.how)
    for c in columnNames:
        df[c] = df[c].diff(1) / (analyticConfig.sample_minutes * 60 * 1000)
    df = DataOperations.unpivot(df, timelyMetric.metric, analyticConfig.groupByColumn)
    df = df.dropna()
    timelyMetric.setDataFrame(df)
예제 #3
0
def getPCATraingAndTesting(thresh):
    allData = loadData()
    trainingData, testingData = do.partionData(allData, .8)
    trainingX = trainingData.loc[:, features]
    trainingY = trainingData.loc[:,'label']

    testingX = testingData.loc[:, features]
    testingY = testingData.loc[:, 'label']
    #Standardize features 
    #trainingX = StandardScaler().fit_transform(trainingX)

    pca = PCA()
    #Run PCA decomposition
    principalComponents = pca.fit_transform(trainingX)

    #Compute and print the number of components that PCA will extract
    numPcaComponents = findBestPCAFeatures(pca.explained_variance_ratio_, thresh)
    print(f'Components: {numPcaComponents}')

    principalDf = pd.DataFrame(principalComponents)
    trainingX = principalDf.iloc[:, 0:numPcaComponents+1]

    #Plot how each component affects the label
    #plotPcaComponentsAffectingY(principalDf, trainingY)

    testingX = pd.DataFrame(pca.transform(testingX))
    testingX = testingX.iloc[:, 0:numPcaComponents+1]

    return trainingX, trainingY, testingX, testingY
예제 #4
0
def crawl_friends(twitter_api, screen_name, limit=1000, depth=2):
    # in storage
    seed_id = str(twitter_api.users.show(screen_name=screen_name)['id'])

    next_queue = get_friends_followers_ids(twitter_api, user_id=seed_id, friends_limit=limit, followers_limit=0)
    # Store a seed_id => _follower_ids mapping in MongoDB
    DataOperations.save_to_mongo({'followers' : [ _id for _id in next_queue ]}, 'users_crawl', 'users_ids'.format(seed_id))
    d = 1
    while d < depth:
        d += 1
        (queue, next_queue) = (next_queue, [])
        for fid in queue:
            follower_ids = get_friends_followers_ids(twitter_api, user_id=fid,friends_limit=limit,followers_limit=0)
            # Store a fid => follower_ids mapping in MongoDB
            DataOperations.save_to_mongo({'followers' : [ _id for _id in next_queue ]}, 'users_crawl', 'users_ids')
            next_queue += follower_ids
예제 #5
0
def doBestFeatureSelection(clf, numFeatures):
    multDf = pd.read_csv(os.path.dirname(os.path.abspath(__file__))+'/data/TrainData_Labeled.csv')
    multTraining, multTesting = do.partionData(multDf, .8)
    bestFeatures = fs.getBestFeaturesForHigherOrderTerms(clf, multTraining, numFeatures, 'accuracy')
    #bestFeatures = list(['alcohol', 'volatile acidity*total sulfur dioxide*density*', 'volatile acidity*chlorides*free sulfur dioxide*pH*', 'fixed acidity*volatile acidity*free sulfur dioxide*pH*sulphates*'])
    print(bestFeatures)

    trainingData = multTraining.loc[:, bestFeatures]
    trainingY = multTraining['label']
    trainingData.insert(loc = len(trainingData.columns),column='label', value=trainingY)

    testingData = multTesting.loc[:, bestFeatures]
    testingY = multTesting['label']
    testingData.insert(loc = len(testingData.columns),column='label', value=testingY)
    print(testingData)
    do.fitTrainingData(clf, trainingData)
    do.testClassifier(clf, testingData, "Random Forests")
예제 #6
0
    def graph(self, type="png"):

        graphConfig = {}
        graphConfig["title"] = DataOperations.getTitle(
            self.timelyMetric.metric, self.analyticConfig)
        return TimelyMetric.graph(self.analyticConfig,
                                  self.dataFrame,
                                  self.timelyMetric.metric,
                                  seriesConfig=self.seriesConfig,
                                  graphConfig=graphConfig,
                                  notebook=self.notebook,
                                  type=type)
예제 #7
0
def combine_users_tweets():
    timelines_cursor = get_users_timeline_from_db()
    urls_pattern = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
    i=0
    for tl in timelines_cursor:
        i += 1
        if i < 10:
            continue
        DataOperations.append_to_file("temp", str(tl['user_id']))
        for tweet in tl['timeline']:
            first_text = tweet['text']
            lean_text = first_text

            lean_text.encode('ascii', 'ignore')
            lean_text = remove_entities(lean_text, urls_pattern)
            testObj = TextProcesor.TextProcesor()
            lean_text = testObj.removeSpecialChars(lean_text)

            lean_text_arr = testObj.splitToTokens(lean_text)
            lean_text_arr = testObj.removeStopwords(lean_text_arr)
            lean_text_arr = testObj.removeSingles(lean_text_arr)
            DataOperations.append_arr_to_file("temp", lean_text_arr)
        return
예제 #8
0
    def graph(self, type="png"):

        graphConfig = {}
        graphConfig["title"] = DataOperations.getTitle(self.timelyMetric, self.analyticConfig)
        return TimelyMetric.graph(self.analyticConfig, self.dataFrame, self.timelyMetric, seriesConfig=self.seriesConfig, graphConfig=graphConfig, notebook=self.notebook, type=type)
예제 #9
0
    'how': 'mean',
    'rolling_average_period': '12 hours',
    'min_threshold': None,
    'average_min_threshold': None,
    'max_threshold': None,
    'average_max_threshold': None,
    'min_threshold_percentage': -50,
    'max_threshold_percentage': 50,
    'boolean': 'and',
    'min_alert_period': '5 minutes',
    'last_alert': '1 hour',
    'display': 'all',
    'output_dir': '/path/to/output'
})

alert = TimelyAnalytic.find_alerts(timelyMetric, analyticConfig)

if alert is not None:
    # write graph to file
    oldmask = os.umask(022)
    file = alert.graph(type='html')
    os.umask(oldmask)

    text = DataOperations.getTitle(timelyMetric, analyticConfig)

    # send email with graph attached
    alert.email("", "", text, text, [file])

    # log to syslog
    alert.log(text)
예제 #10
0
def get_users_timeline_from_db():
    uslov = { 'timeline': { '$not': { '$size': 0 } } }
    return DataOperations.load_from_mongo('users_crawl', 'users_timelines', True, criteria=uslov)
예제 #11
0
def find_alerts(timelyMetric, analyticConfig, notebook=False):

    if (analyticConfig.counter == True):
        convertCounterToRate(timelyMetric, analyticConfig)

    df = timelyMetric.getDataFrame()

    graphDF = TimelyMetric.pivot(df, timelyMetric.metric, groupByColumn=analyticConfig.groupByColumn)

    if analyticConfig.excludeColRegex is not None:
        graphDF = graphDF.select(lambda x : not (re.search(analyticConfig.excludeColRegex, x)), axis=1)
    if analyticConfig.includeColRegex is not None:
        graphDF = graphDF.select(lambda x : re.search(analyticConfig.includeColRegex, x), axis=1)

    if analyticConfig.sample is not None:
        graphDF = TimelyMetric.resample(graphDF, analyticConfig.sample, how=analyticConfig.how)

    graphDF_avg = pandas.DataFrame(graphDF, copy=True)

    combined = pandas.DataFrame()

    seriesConfig = {}
    for i in graphDF_avg.columns:
        col = str(i)

        any_conditions_values = False
        result_values = np.ones(graphDF[col].shape, bool)
        if analyticConfig.orCondition:
            result_values = np.zeros(graphDF[col].shape, bool)

        any_conditions_average = False
        result_average = np.ones(graphDF_avg[col].shape, bool)
        if analyticConfig.orCondition:
            result_average = np.zeros(graphDF_avg[col].shape, bool)

        if analyticConfig.min_threshold is not None:
            currCondition = graphDF[col].astype(float) < analyticConfig.min_threshold
            result_values = addCondition(analyticConfig.orCondition, result_values, currCondition)
            any_conditions_values = True

        if analyticConfig.max_threshold is not None:
            currCondition = graphDF[col].astype(float) > analyticConfig.max_threshold
            result_values = addCondition(analyticConfig.orCondition, result_values, currCondition)
            any_conditions_values = True

        if analyticConfig.rolling_average_samples is not None:
            graphDF_avg = TimelyMetric.rolling_average(graphDF_avg, col, rolling_average=analyticConfig.rolling_average_samples)
            if analyticConfig.min_threshold_percentage is not None:
                if analyticConfig.min_threshold_percentage >= 0:
                    multiple = 1.0 + (float(abs(analyticConfig.min_threshold_percentage)) / float(100))
                else:
                    multiple = 1.0 - (float(abs(analyticConfig.min_threshold_percentage)) / float(100))
                currCondition = graphDF[col].astype(float) < (graphDF_avg[col].astype(float) * multiple)
                result_values = addCondition(analyticConfig.orCondition, result_values, currCondition)
                any_conditions_values = True

            if analyticConfig.max_threshold_percentage is not None:
                if analyticConfig.max_threshold_percentage >= 0:
                    multiple = 1.0 + (float(abs(analyticConfig.max_threshold_percentage)) / float(100))
                else:
                    multiple = 1.0 - (float(abs(analyticConfig.max_threshold_percentage)) / float(100))
                currCondition = graphDF[col].astype(float) > (graphDF_avg[col].astype(float) * multiple)
                result_values = addCondition(analyticConfig.orCondition, result_values, currCondition)
                any_conditions_values = True

            if analyticConfig.average_min_threshold is not None:
                currCondition = graphDF_avg[col].astype(float) < analyticConfig.average_min_threshold
                result_average = addCondition(analyticConfig.orCondition, result_average, currCondition)
                any_conditions_average = True
            if analyticConfig.average_max_threshold is not None:
                currCondition = graphDF_avg[col].astype(float) > analyticConfig.average_max_threshold
                result_average = addCondition(analyticConfig.orCondition, result_average, currCondition)
                any_conditions_average = True

        # if orCondition is AND and no exceptional conditions have been found, then result_values will be all True
        if any_conditions_values == False:
            result_values = np.zeros(graphDF[col].shape, bool)
        exceptional_values = graphDF.loc[result_values, col]

        # if orCondition is AND and no exceptional conditions have been found, then result_average will be all True
        if any_conditions_average == False:
            result_average = np.zeros(graphDF_avg[col].shape, bool)
        exceptional_average = graphDF_avg.loc[result_average, col]

        # only keep alerts that are in consecutive periods of length analyticConfig.min_alert_minutes
        exceptional_values = keepConsecutiveAlerts(graphDF, exceptional_values, analyticConfig.min_alert_minutes)
        exceptional_average = keepConsecutiveAlerts(graphDF_avg, exceptional_average, analyticConfig.min_alert_minutes)

        # only evaluate the last analyticConfig.last_alert_minutes if set
        if analyticConfig.last_alert_minutes is not None:
            end = datetime.fromtimestamp(timelyMetric.timeDateRange.getEndMs() / 1000.00, DataOperations.utc)
            recentEnoughBegin = end - timedelta(minutes=analyticConfig.last_alert_minutes)
            exceptional_values = exceptional_values.ix[recentEnoughBegin:end]
            exceptional_average = exceptional_average.ix[recentEnoughBegin:end]

        anyValueExceptions = exceptional_values.size > 0
        anyAverageExceptions = exceptional_average.size > 0

        if (analyticConfig.display.lower() == "all") or (analyticConfig.display.lower() == "alerts" and anyValueExceptions):
            combined[col] = graphDF[col]

        if analyticConfig.rolling_average_samples is not None:
            if (analyticConfig.display.lower() == "all"):
                combined[col + '_avg'] = graphDF_avg[col]
            else:
                if (anyAverageExceptions):
                    combined[col + '_avg'] = graphDF_avg[col]
                if (anyValueExceptions and (analyticConfig.min_threshold_percentage is not None or analyticConfig.max_threshold_percentage is not None)):
                    combined[col + '_avg'] = graphDF_avg[col]

        if ((analyticConfig.display.lower() == "all") or (analyticConfig.display.lower() == "alerts" and anyValueExceptions)):
            combined[col + '_warn'] = exceptional_values.dropna()

            seriesConfig[col + '_warn'] = {
                "mode" : "markers",
                "marker" : {
                    "symbol" : "hash-open",
                    "color" : "red"
                }
            }

        if ((analyticConfig.display.lower() == "all") or (analyticConfig.display.lower() == "alerts" and anyAverageExceptions)):
            combined[col + '_avg_warn'] = exceptional_average.dropna()

            seriesConfig[col + '_avg_warn'] = {
                "mode" : "markers",
                "marker" : {
                    "symbol" : "hash-open",
                    "color" : "red"
                }
            }

    timelyAlert = None
    if not combined.empty:
        alertAnalyticConfig = TimelyAnalyticConfiguration(analyticConfig)
        if alertAnalyticConfig.groupByColumn is None:
            alertAnalyticConfig.groupByColumn = timelyMetric.metric + "_obs"
        combined = TimelyMetric.unpivot(combined, timelyMetric.metric, groupByColumn=alertAnalyticConfig.groupByColumn)
        combined = combined.sort_index()
        combined['date'] = combined.index.values
        combined = combined.sort_values(['date', alertAnalyticConfig.groupByColumn])
        combined = combined.drop(['date'], 1)
        combined = combined.dropna()
        combined = DataOperations.ensureMinSeriesLength(combined, alertAnalyticConfig.groupByColumn)

        message = DataOperations.getTitle(timelyMetric, analyticConfig, separator=', ')

        timelyAlert = TimelyAlert(timelyMetric, combined, message, seriesConfig, alertAnalyticConfig, notebook)

    return timelyAlert
예제 #12
0
 def __iter__(self):
     for cursor in DataOperations.load_from_mongo('users_crawl', 'users_vectors', True):
     # assume there's one document per line, tokens separated by whitespace
         yield cursor['vector']
예제 #13
0
def find_alerts(timelyMetric, analyticConfig, notebook=False):


    df = timelyMetric.getDataFrame()

    graphDF = TimelyMetric.pivot(df, timelyMetric.metric, groupByColumn=analyticConfig.groupByColumn)

    if analyticConfig.excludeColRegex is not None:
        graphDF = graphDF.select(lambda x : not (re.search(analyticConfig.excludeColRegex, x)), axis=1)
    if analyticConfig.includeColRegex is not None:
        graphDF = graphDF.select(lambda x : re.search(analyticConfig.includeColRegex, x), axis=1)

    if analyticConfig.sample is not None:
        graphDF = TimelyMetric.resample(graphDF, analyticConfig.sample, how=analyticConfig.how)

    graphDF_avg = pandas.DataFrame(graphDF, copy=True)

    combined = pandas.DataFrame()

    now = Timestamp.now()

    seriesConfig = {}
    for i in graphDF_avg.columns:
        col = str(i)

        any_conditions_values = False
        result_values = np.ones(graphDF[col].shape, bool)
        if analyticConfig.orCondition:
            result_values = np.zeros(graphDF[col].shape, bool)

        any_conditions_average = False
        result_average = np.ones(graphDF_avg[col].shape, bool)
        if analyticConfig.orCondition:
            result_average = np.zeros(graphDF_avg[col].shape, bool)

        if analyticConfig.min_threshold is not None:
            currCondition = graphDF[col].astype(float) < analyticConfig.min_threshold
            result_values = addCondition(analyticConfig.orCondition, result_values, currCondition)
            any_conditions_values = True

        if analyticConfig.max_threshold is not None:
            currCondition = graphDF[col].astype(float) > analyticConfig.max_threshold
            result_values = addCondition(analyticConfig.orCondition, result_values, currCondition)
            any_conditions_values = True

        if analyticConfig.rolling_average_samples is not None:
            graphDF_avg = TimelyMetric.rolling_average(graphDF_avg, col, rolling_average=analyticConfig.rolling_average_samples)
            if analyticConfig.alert_percentage is not None:
                if analyticConfig.alert_percentage > 0:
                    multiple = 1.0 + (float(abs(analyticConfig.alert_percentage)) / float(100))
                    currCondition = graphDF[col].astype(float) > (graphDF_avg[col].astype(float) * multiple)
                    result_values = addCondition(analyticConfig.orCondition, result_values, currCondition)
                    any_conditions_values = True
                if analyticConfig.alert_percentage < 0:
                    multiple = 1.0 - (float(abs(analyticConfig.alert_percentage)) / float(100))
                    if multiple > 0:
                        currCondition = graphDF[col].astype(float) < (graphDF_avg[col].astype(float) * multiple)
                        result_values = addCondition(analyticConfig.orCondition, result_values, currCondition)
                        any_conditions_values = True

            if analyticConfig.average_min_threshold is not None:
                currCondition = graphDF_avg[col].astype(float) < analyticConfig.average_min_threshold
                result_average = addCondition(analyticConfig.orCondition, result_average, currCondition)
                any_conditions_average = True
            if analyticConfig.average_max_threshold is not None:
                currCondition = graphDF_avg[col].astype(float) > analyticConfig.average_max_threshold
                result_average = addCondition(analyticConfig.orCondition, result_average, currCondition)
                any_conditions_average = True

        # if orCondition is AND and no exceptional conditions have been found, then result_values will be all True
        if any_conditions_values == False:
            result_values = np.zeros(graphDF[col].shape, bool)
        exceptional_values = graphDF.loc[result_values, col]

        # if orCondition is AND and no exceptional conditions have been found, then result_average will be all True
        if any_conditions_average == False:
            result_average = np.zeros(graphDF_avg[col].shape, bool)
        exceptional_average = graphDF_avg.loc[result_average, col]

        # only evaluate the last analyticConfig.last_alert_minutes if set
        if analyticConfig.last_alert_minutes is not None:
            recentEnoughBegin = now - timedelta(minutes=analyticConfig.last_alert_minutes)
            exceptional_values = exceptional_values.ix[recentEnoughBegin:now]
            exceptional_average = exceptional_average.ix[recentEnoughBegin:now]

        # only keep alerts that are in consecutive periods of length analyticConfig.min_alert_minutes
        exceptional_values = keepConsecutiveAlerts(graphDF, exceptional_values, analyticConfig.min_alert_minutes)
        exceptional_average = keepConsecutiveAlerts(graphDF_avg, exceptional_average, analyticConfig.min_alert_minutes)

        anyValueExceptions = exceptional_values.size > 0
        anyAverageExceptions = exceptional_average.size > 0

        if (analyticConfig.display.lower() == "all") or (analyticConfig.display.lower() == "alerts" and anyValueExceptions):
            combined[col] = graphDF[col]

        if analyticConfig.rolling_average_samples is not None:
            if (analyticConfig.display.lower() == "all") or (analyticConfig.display.lower() == "alerts" and (anyAverageExceptions or analyticConfig.alert_percentage is not None)):
                combined[col+'_avg'] = graphDF_avg[col]

        if ((analyticConfig.display.lower() == "all") or (analyticConfig.display.lower() == "alerts" and anyValueExceptions)):
            combined[col + '_warn'] = exceptional_values.dropna()

            seriesConfig[col + '_warn'] = {
                "mode" : "markers",
                "marker" : {
                    "symbol" : "hash-open",
                    "color" : "red"
                }
            }

        if ((analyticConfig.display.lower() == "all") or (analyticConfig.display.lower() == "alerts" and anyAverageExceptions)):
            combined[col + '_avg_warn'] = exceptional_average.dropna()

            seriesConfig[col + '_avg_warn'] = {
                "mode" : "markers",
                "marker" : {
                    "symbol" : "hash-open",
                    "color" : "red"
                }
            }

    timelyAlert = None
    if not combined.empty:
        alertAnalyticConfig = TimelyAnalyticConfiguration(analyticConfig)
        if alertAnalyticConfig.groupByColumn is None:
            alertAnalyticConfig.groupByColumn = timelyMetric.metric + "_obs"
        combined = TimelyMetric.unpivot(combined, timelyMetric.metric, groupByColumn=alertAnalyticConfig.groupByColumn)
        combined = combined.sort_index()
        combined['date'] = combined.index.values
        combined = combined.sort_values(['date', analyticConfig.groupByColumn])
        combined = combined.drop(['date'], 1)
        combined = combined.dropna()
        combined = DataOperations.ensureMinSeriesLength(combined, alertAnalyticConfig.groupByColumn)

        message = DataOperations.getTitle(timelyMetric, analyticConfig, separator=', ')

        timelyAlert = TimelyAlert(timelyMetric, combined, message, seriesConfig, alertAnalyticConfig, notebook)

    return timelyAlert
예제 #14
0
def find_alerts(timelyMetric, analyticConfig, notebook=False):

    if analyticConfig.groupByColumn is None:
        return

    df = timelyMetric.getDataFrame()

    graphDF = TimelyMetric.pivot(df,
                                 timelyMetric.metric,
                                 groupByColumn=analyticConfig.groupByColumn)

    if analyticConfig.sample is not None:
        graphDF = TimelyMetric.resample(graphDF,
                                        analyticConfig.sample,
                                        how=analyticConfig.how)

    if analyticConfig.excludeColRegex is not None:
        graphDF = graphDF.select(
            lambda x: not (re.search(analyticConfig.excludeColRegex, x)),
            axis=1)
    if analyticConfig.includeColRegex is not None:
        graphDF = graphDF.select(
            lambda x: re.search(analyticConfig.includeColRegex, x), axis=1)

    graphDF_avg = pandas.DataFrame(graphDF, copy=True)

    combined = pandas.DataFrame()

    seriesConfig = {}
    for i in graphDF_avg.columns:
        col = str(i)

        anyConditions = False
        result = np.ones(graphDF[col].shape, bool)
        if analyticConfig.orCondition:
            result = np.zeros(graphDF[col].shape, bool)

        if analyticConfig.min_threshold is not None:
            currCondition = graphDF[col].astype(
                float) < analyticConfig.min_threshold
            result = addCondition(analyticConfig.orCondition, result,
                                  currCondition)
            anyConditions = True

        if analyticConfig.max_threshold is not None:
            currCondition = graphDF[col].astype(
                float) > analyticConfig.max_threshold
            result = addCondition(analyticConfig.orCondition, result,
                                  currCondition)
            anyConditions = True

        graphDF_avg = TimelyMetric.rolling_average(
            graphDF_avg,
            str(i),
            rolling_average=analyticConfig.rolling_average)
        if (analyticConfig.alert_percentage
                is not None) and (analyticConfig.rolling_average is not None):
            if analyticConfig.alert_percentage > 0:
                multiple = 1.0 + (float(abs(analyticConfig.alert_percentage)) /
                                  float(100))
                currCondition = graphDF[col].astype(float) > (
                    graphDF_avg[col].astype(float) * multiple)
                result = addCondition(analyticConfig.orCondition, result,
                                      currCondition)
                anyConditions = True
            if analyticConfig.alert_percentage < 0:
                multiple = 1.0 - (float(abs(analyticConfig.alert_percentage)) /
                                  float(100))
                if multiple > 0:
                    currCondition = graphDF[col].astype(float) < (
                        graphDF_avg[col].astype(float) * multiple)
                    result = addCondition(analyticConfig.orCondition, result,
                                          currCondition)
                    anyConditions = True

        if anyConditions == False:
            result = np.zeros(graphDF[col].shape, bool)

        exceptional = graphDF.loc[result, col]

        if (analyticConfig.display.lower()
                == "all") or (analyticConfig.display.lower() == "alerts"
                              and exceptional.size > 0):
            combined[col] = graphDF[col]

        if ((analyticConfig.rolling_average is not None)
                and ((analyticConfig.display.lower() == "all") or
                     (analyticConfig.display.lower() == "alerts"
                      and exceptional.size > 0
                      and analyticConfig.alert_percentage is not None))):
            combined[col + '_avg'] = graphDF_avg[col]

        if (exceptional.size > 0):
            combined[col + '_warn'] = exceptional.dropna()
            seriesConfig[col + '_warn'] = {
                "mode": "markers",
                "marker": {
                    "symbol": "hash-open",
                    "color": "red"
                }
            }

    timelyAlert = None
    if not combined.empty:
        combined = TimelyMetric.unpivot(
            combined,
            timelyMetric.metric,
            groupByColumn=analyticConfig.groupByColumn)
        combined = combined.sort_index()
        combined['date'] = combined.index.values
        combined = combined.sort_values(['date', analyticConfig.groupByColumn])
        combined = combined.drop(['date'], 1)
        combined = combined.dropna()
        combined = DataOperations.ensureMinSeriesLength(
            combined, analyticConfig.groupByColumn)

        timelyAlert = TimelyAlert(timelyMetric, combined, seriesConfig,
                                  analyticConfig, notebook)

    return timelyAlert
예제 #15
0
def get_users_friends_from_db():
    friends_ids = DataOperations.load_from_mongo('users_crawl', 'users_ids')
    return friends_ids
예제 #16
0
    'how' : 'mean',
    'rolling_average_period' : '12 hours',
    'min_threshold' : None,
    'average_min_threshold' : None,
    'max_threshold' : None,
    'average_max_threshold' : None,
    'alert_percentage' : 25,
    'boolean' : 'and',
    'min_alert_period' : '5 minutes',
    'last_alert' : '1 hour',
    'display' : 'all',
    'output_dir' : '/path/to/output'
})

alert = TimelyAnalytic.find_alerts(timelyMetric, analyticConfig)

if alert is not None:
    # write graph to file
    oldmask = os.umask(022)
    file = alert.graph(type='html')
    os.umask(oldmask)

    text = DataOperations.getTitle(timelyMetric, analyticConfig)

    # send email with graph attached
    alert.email("", "", text, text, [file])

    # log to syslog
    alert.log(text)

예제 #17
0
        except:
            print("Please Enter Correct Input")

    #Requesting Webpage
    uClient = uReq(my_url)
    page_html = uClient.read()
    uClient.close()
    page_soup = soup(page_html, "html.parser")  # Using HTML Parser

    # These Are the Three Different Type Of Html Containers present on Flipkart
    vertical_containers = page_soup.find_all("div", {"style": "width:25%"})
    horizontal_containers = page_soup.find_all("div", {"class": "_3pLy-c row"})
    special_type = page_soup.find_all("div", {"class": "_2B099V"})

    for n in range(1, pages + 1):
        if special_type:
            watchTypeContainer.watchContainer(special_type, query, file, n)
        elif horizontal_containers:
            horizontalContainersFlipkart.horizontalContainer(
                horizontal_containers, query, file, n)
        elif vertical_containers:
            verticalContainersFlipkart.verticalContainer(
                vertical_containers, query, file, n)
    file.close()


if __name__ == '__main__':
    main()

DataOperations.analyzingData()
예제 #18
0
def save_single_user_timeline(twitter_api, user_id):
    id = int(user_id)
    timeline = harvest_user_timeline(twitter_api, user_id=id)
    results = {'timeline': timeline, 'user_id': id}
    DataOperations.save_to_mongo(results, 'users_crawl', 'users_timelines')