Example #1
0
def incidentMetaData(df, dfPerKm, company, lang):

    def most_common_substance(df, meta, lang):
        if lang == 'en':
            df_substance = df[df['Substance'] != "Not Applicable"].copy()
        else:
            df_substance = df[df['Substance'] != "Sans object"].copy()
        meta = most_common(df_substance, meta, "Substance", "mostCommonSubstance")
        return meta

    def other_types(df, lang):
        if lang == 'en':
            serious = {"Adverse Environmental Effects": 0,
                       "Fatality": 0,
                       "Serious Injury (CER or TSB)": 0}
        else:
            df['Incident Types'] = df['Incident Types'].replace({'Effets environnementaux négatifs': 'Adverse Environmental Effects',
                                                                 'Blessure grave (Régie ou BST)': 'Serious Injury (CER or TSB)',
                                                                 'Décès': 'Fatality'})
            serious = {"Adverse Environmental Effects": 0,
                       "Fatality": 0,
                       "Serious Injury (CER or TSB)": 0}
        for type_list in df['Incident Types']:
            type_list = [x.strip() for x in type_list.split(",")]
            for t in type_list:
                if t in serious:
                    serious[t] = serious[t] + 1
        return serious

    def thisCompanyPct(df, df_c):
        pct = {}
        countPct = (len(df_c.index)/len(df.index))*100
        if countPct >= 1:
            countPct = round(countPct, 0)
        else:
            countPct = round(countPct, 1)
        pct['count'] = countPct
        return pct

    # filter to specific company
    df_c = df[df['Company'] == company].copy()
    meta = {}
    # meta['relativePct'] = thisCompanyPct(df, df_c)
    meta['companyName'] = company
    meta['seriousEvents'] = other_types(df_c, lang)
    meta['release'] = int(df_c['Approximate Volume Released'].notnull().sum())
    meta['nonRelease'] = int(df_c['Approximate Volume Released'].isna().sum())

    meta = most_common(df_c, meta, "What Happened", "mostCommonWhat")
    meta = most_common(df_c, meta, "Why It Happened", "mostCommonWhy")
    meta = most_common_substance(df_c, meta, lang)
    return meta
Example #2
0
 def most_common_substance(df, meta, lang):
     if lang == 'en':
         df_substance = df[df['Substance'] != "Not Applicable"].copy()
     else:
         df_substance = df[df['Substance'] != "Sans object"].copy()
     meta = most_common(df_substance, meta, "Substance", "mostCommonSubstance")
     return meta
Example #3
0
def id3(data):
    if all(x['analito'] == 'glicose' for x in data):
        return Leaf('glicose')

    if all(x['analito'] == 'triglicerideo' for x in data):
        return Leaf('triglicerideo')

    if len([k for k in data[0].keys() if k != 'analito']) <= 0:
        return Leaf(most_common([d['analito'] for d in data]))

    # Entropia dos atributos
    splited = {k: entropy([x[k] for x in data.copy()]) for k in data[0].keys()}
    splited.pop('analito')

    # Atributo separador
    separator = min(splited.items(), key=lambda x: x[1])[0]
    node = Node(separator)

    if is_continuous(data[0][separator]):
        for x in data:
            x[separator] = round(x[separator], PRECISION)

    for v in set(x[separator] for x in data):
        aux = [x for x in data.copy() if separator in x and x[separator] == v]
        for i in aux:
            i.pop(separator)

        node.add_son({v: id3(aux)})

    return node
def predict_track(clf, track):
    predicted = predict_tracks(clf, [track, ])
    track['sample_predictions'] = predicted
    prediction, predictions = util.most_common(predicted)
    track['prediction'] = prediction
    track['predictions'] = prediction
    return prediction, predictions
Example #5
0
def cluster_n_label(cluster_count, seed_count, data, target):
    kmeans = sklearn.cluster.KMeans(n_clusters=cluster_count)
    kmeans.fit(data)

    total_score = 0
    avg_acc = None
    seed_numbers = util.seed_numbers(len(data), seed_count)

    # print('seed_numbers:', seed_numbers)

    clusters_labels = [[] for i in range(cluster_count)]
    seed_labels = []
    for seed_number in seed_numbers:
        seed_label = target[seed_number]
        seed_data = data[seed_number]

        cluster = kmeans.predict([seed_data])
        clusters_labels[cluster].append(seed_label)
        seed_labels.append(seed_label)

    # print(seed_labels)

    # using a consensus from inside the same cluster

    major_labels = map(
        lambda cluster: util.most_common(cluster)[0] if len(cluster) > 0 else util.most_common(seed_labels)[0],
        clusters_labels,
    )
    major_labels = list(major_labels)

    # print('major:', major_labels)

    score = 0
    ssl_label = []
    for i, each in enumerate(data):
        prediction = kmeans.predict([each])[0]
        predicted_label = major_labels[prediction]
        ssl_label.append(predicted_label)

        # print('cluster:', prediction, 'label:', predicted_label, 'actual:', test[i][0])

        if target[i] == predicted_label:
            score += 1

    # print('score:', score, 'total:', len(data), 'acc:', (score / len(data)) * 100)

    return data, ssl_label
Example #6
0
def get_complain_most_company():
    lista = list()
    for rec in Reclamacao.objects:
        lista.append(rec.companhia)
    l = most_common(lista)
    return jsonify({
        'empresa': l,
    })
Example #7
0
def get_complain_most_locale():
    lista = list()
    for rec in Reclamacao.objects:
        lista.append(rec.local)
    l = most_common(lista)
    return jsonify({
        'local': l,
    })
Example #8
0
	def associate_measurements(self, z, C, H, threshold=None):
		R_inflation = [2.5, 2.5]
		asso_mat = np.zeros((self.N, len(z)))
		for i in range(self.N):
			if i == 0:
				asso_mat[i] = associate_measurements(z, self.ensemble[i], self.R, C, H, threshold=threshold, debug=False)
			else:
				asso_mat[i] = associate_measurements(z, self.ensemble[i], self.R, C, H, threshold=threshold)
		return [most_common(list(asso_mat[:, i])) for i in range(len(z))]
Example #9
0
 def multiPredict(self, X, Y, k):
     s2 = []
     predicitonTopK = self.dfResult.iloc[0:k + 1, 0]
     for i in range(len(Y)):
         temp = []
         for j in range(k):
             temp.append(predicitonTopK[j][i])
         s2.append(util.most_common(temp))
     self.prediction = np.asarray(s2)
Example #10
0
def incidentMetaData(df, dfPerKm, company):

    def most_common_substance(df, meta):
        df_substance = df[df['Substance'] != "Not Applicable"].copy()
        meta = most_common(df_substance, meta, "Substance", "mostCommonSubstance")
        return meta

    def other_types(df):
        serious = {"Adverse Environmental Effects": 0,
                   "Fatality": 0,
                   "Serious Injury (CER or TSB)": 0}

        for type_list in df['Incident Types']:
            type_list = [x.strip() for x in type_list.split(",")]
            for t in type_list:
                if t in serious:
                    serious[t] = serious[t] + 1
        return serious

    def thisCompanyPct(df, df_c):
        pct = {}
        countPct = (len(df_c.index)/len(df.index))*100
        if countPct >= 1:
            countPct = round(countPct, 0)
        else:
            countPct = round(countPct, 1)
        pct['count'] = countPct
        return pct

    # filter to specific company
    df_c = df[df['Company'] == company].copy()
    meta = {}
    meta['companyName'] = company
    meta['seriousEvents'] = other_types(df_c)
    meta['release'] = int(df_c['Approximate Volume Released'].notnull().sum())
    meta['nonRelease'] = int(df_c['Approximate Volume Released'].isna().sum())

    meta = most_common(df_c, meta, "what common", "mostCommonWhat")
    meta = most_common(df_c, meta, "why common", "mostCommonWhy")

    meta["mostCommonWhat"] = [x.strip() for x in meta["mostCommonWhat"].split(" & ")]
    meta["mostCommonWhy"] = [x.strip() for x in meta["mostCommonWhy"].split(" & ")]
    meta = most_common_substance(df_c, meta)
    return meta
Example #11
0
    def grade_de_reconhecimento(self, lista_de_teste):
        desenhar = {}
        # Reconhece a entrada
        for i in lista_de_teste:
            z = self.reconhece2(i[0])
            try:
                desenhar[z[0]].append(i[1])
            except:
                desenhar[z[0]] = [i[1]]
        # Reconhece o mais frequente
        for i in desenhar:
            desenhar[i] = most_common(desenhar[i])

        self.rec_grid = desenhar
Example #12
0
    def strategy1(self, poll_from=0):
        # find most likely answer
        try:
            answers = [
                method(self)
                for method in TriviaQuestion.CHOSEN_METHODS[poll_from:]
            ]
            print(answers)
            answers = [
                answer for answer in answers if answer != UNCERTAIN_ANSWER
            ]

            # print("Polled answers: ", list(answers))
            return util.most_common(answers)
        except:
            # All answers were -1, randomly guess its 0
            return 0
Example #13
0
def kfold(tracks, feature_names, folds=5, shuffle=True, **kwargs):
    labels = [track['label'] for track in tracks]
    kf = cross_validation.StratifiedKFold(labels, n_folds=folds, shuffle=shuffle)
    for train, test in kf:
        train_tracks = [tracks[i] for i in train]
        test_tracks = [tracks[i] for i in test]
        clf = machine_learning.Classifier(**kwargs)
        clf = machine_learning.train_tracks(clf, train_tracks, feature_names)
        predicted_all = []
        Y_test_all = []
        for track in test_tracks:
            X_test, Y_test = machine_learning.shape_features([track], feature_names)
            predicted = machine_learning.predict(X_test, clf)
            track['sample_predictions'] = predicted
            track['prediction'], track['predictions'] = util.most_common(predicted)
            predicted_all.extend(predicted)
            Y_test_all.extend(Y_test)
        yield test_tracks
Example #14
0
def kfold(tracks, feature_names, folds=5, shuffle=True, **kwargs):
    labels = [track['label'] for track in tracks]
    kf = cross_validation.StratifiedKFold(labels,
                                          n_folds=folds,
                                          shuffle=shuffle)
    for train, test in kf:
        train_tracks = [tracks[i] for i in train]
        test_tracks = [tracks[i] for i in test]
        clf = machine_learning.Classifier(**kwargs)
        clf = machine_learning.train_tracks(clf, train_tracks, feature_names)
        predicted_all = []
        Y_test_all = []
        for track in test_tracks:
            X_test, Y_test = machine_learning.shape_features([track],
                                                             feature_names)
            predicted = machine_learning.predict(X_test, clf)
            track['sample_predictions'] = predicted
            track['prediction'], track['predictions'] = util.most_common(
                predicted)
            predicted_all.extend(predicted)
            Y_test_all.extend(Y_test)
        yield test_tracks
Example #15
0
 def testMostCommonTie(self):
     meta = {}
     meta = most_common(self.df, meta, "row_3", "testTie", top=1)
     self.assertEqual(meta["testTie"], "4 & 8")
Example #16
0
 def testMostCommonText2(self):
     meta = {}
     meta = most_common(self.df, meta, "row_2", "testTop2", top=2)
     self.assertEqual(meta["testTop2"], {'e': 3, 'c': 2})
Example #17
0
 def testMostCommonNumber1(self):
     meta = {}
     meta = most_common(self.df, meta, "row_1", "testTop1", top=1)
     self.assertEqual(meta["testTop1"], "0")
Example #18
0
 def testMostCommonText1(self):
     meta = {}
     meta = most_common(self.df, meta, "row_2", "testTop1", top=1)
     self.assertEqual(meta["testTop1"], "e")
Example #19
0
 def most_common_substance(df, meta):
     df_substance = df[df['Substance'] != "Not Applicable"].copy()
     meta = most_common(df_substance, meta, "Substance",
                        "mostCommonSubstance")
     return meta
Example #20
0
def knn(k, training_data, test_element):
    distances = [(i, distance(i, test_element)) for i in training_data]
    nearest = sorted(distances, key=lambda x: x[1])[:k]

    return most_common([x[0]['analito'] for x in nearest])
Example #21
0
def metadata(df, company, test):
    def filter_near(city):
        if len(city) <= 2:
            return False
        else:
            return True

    this_company_meta = {}
    this_company_meta["totalEvents"] = int(len(list(set(df['Event Number']))))
    this_company_meta["totalDigs"] = int(df['Dig Count'].sum())

    # nearby in the last year
    df['Nearest Populated Centre'] = [
        str(x) for x in df['Nearest Populated Centre']
    ]
    df['Nearest Populated Centre'] = [
        x.split(",")[0].strip() for x in df['Nearest Populated Centre']
    ]

    df_near = df.copy()
    filter_list = [
        'various', 'various locations as per the attached documents.',
        'as per the attached documents', 'as per the attached document',
        'business'
    ]

    df_near = df_near[~df_near['Nearest Populated Centre'].str.lower().
                      isin(filter_list)]
    if not test:
        last_full_year = datetime.today().year - 1
    else:
        last_full_year = 2020
    df_near = df_near[df_near['Commencement Date'].dt.year == last_full_year]
    if not df_near.empty:
        # deal with mnp
        city = []
        for city_string in df_near["Nearest Populated Centre"]:
            if "The project is located" in city_string:
                city.append(city_string.split("of")[-1].strip())
            else:
                city.append(city_string)
        df_near["Nearest Populated Centre"] = city
        for split_char in [",", "("]:
            df_near['Nearest Populated Centre'] = [
                x.split(split_char)[0].strip()
                for x in df_near['Nearest Populated Centre']
            ]
        near_list = list(df_near['Nearest Populated Centre'] + " " +
                         df_near['Province/Territory'].str.upper())
        near_list = [x.replace("Jasper BC", "Jasper AB") for x in near_list]
        near_list = filter(filter_near, near_list)

        df_near = pd.DataFrame(near_list)
        most_common(df_near, this_company_meta, 0, "nearby", 3, "list", False,
                    False)
        this_company_meta["nearbyYear"] = last_full_year
    else:
        this_company_meta["nearby"] = None

    this_company_meta["atRisk"] = sum(
        [1 if x == "y" else 0 for x in df['Species At Risk Present']])
    # RLG didnt want the land statistic
    # new_land = df['New Land Area Needed'].sum()
    # this_company_meta["landRequired"] = int(new_land)
    # this_company_meta["iceRinks"] = int(round((new_land*2.471)/0.375, 0))
    this_company_meta["company"] = company
    return this_company_meta