Example #1
0
def ultimate_evaluate(model):
    genres = ['action', 'drama', 'horror', 'romance']
    testingData = []
    testingLabels = []
    total = defaultdict.fromkeys(range(len(genres)), 0)
    correct = defaultdict.fromkeys(range(len(genres)), 0)
    yTrue, yPredict = [], []
    for genreIndex, genre in enumerate(genres):
        try:
            genreFeatures = load_pkl(genre + "_histogram_test")
            genreFeatures = np.array([np.array(f)
                                      for f in genreFeatures])  # numpy hack
        except Exception as e:
            print e
            return
        print "OK."
        for videoFeatures in genreFeatures:
            total[genreIndex] += 1
            d = defaultdict(int)
            predictedClasses = model.predict(
                videoFeatures)  #List of predictions, per-frame
            print predictedClasses
            for i in predictedClasses:
                d[i] += 1
            predictedGenre = max(d.iteritems(), key=lambda x: x[1])[0]
            yPredict.append(predictedGenre)
            yTrue.append(genreIndex)
            if predictedGenre == genreIndex:
                correct[genreIndex] += 1

    print correct, total

    confusionMatrix = confusion_matrix(yTrue, yPredict)
    print confusionMatrix
 def __init__(self, u_dict, i_dict, user_list, item_list, k):
     self.user_dict = u_dict
     self.item_dict = i_dict
     self.user_list = user_list
     self.item_list = item_list
     self.alpha = 4.2
     self.b_u = defaultdict.fromkeys(user_list, 0.1)
     self.b_i = defaultdict.fromkeys(item_list, 0.1)
     self.b_u.default_factory = float
     self.b_i.default_factory = float
     self.g_u = self.construct_g_u(k)
     self.g_i = self.construct_g_i(k)
     self.g_u.default_factory = lambda: np.zeros(k)
     self.g_i.default_factory = lambda: np.zeros(k)
Example #3
0
def _update_features(id):
    try:
        webapp = Webapp.objects.get(pk=id)
    except Webapp.DoesNotExist:
        _log(id, u'Webapp does not exist')
        return

    # We only detect features on packaged webapps.
    if not webapp.is_packaged:
        _log(id, u'Webapp is not a packaged app')
        return

    # If the app doesn't have a current_version, don't bother either.
    if not webapp.current_version:
        _log(id, u'Webapp does not have a current_version')
        return

    # If the app already has a non-empty feature profile, don't touch it.
    features = webapp.current_version.features
    if features.to_list():
        _log(id, u'Webapp already has a non-empty feature profile')
        return

    version = webapp.current_version
    res = run_validator(version.all_files[0].file_path)
    validation_result = json.loads(res)

    # Set all detected features as True and save them.
    feature_profile = validation_result['feature_profile']
    keys = ['has_%s' % feature.lower() for feature in feature_profile]
    data = defaultdict.fromkeys(keys, True)

    # Update features.
    features.update(**data)
Example #4
0
def extract_nsc(text_body):
    first_pass_regex_match = [
        rtn_regex_grp(p, text_body) for p in nsc_prefix_lst_short
    ]
    second_pass_regex_match = re.findall(
        r'([H|L|J|C|N|Q|F|M|0]{1}[0-9]{2,}\w+)', text_body)
    cmb_regex_matches = list(
        set(second_pass_regex_match + [m
                                       for m in first_pass_regex_match if m]))
    try:
        vouch_nsc = [
            r for r in cmb_regex_matches if len(r) < 10 and len(r) > 4
        ]
    except:
        vouch_nsc = ["Didn't find any NSCs"]
    print(f'NSCs: {vouch_nsc}')
    if len(cmb_regex_matches) > 1:
        vouch_nsc = nsc_exclude(vouch_nsc)
        if not vouch_nsc:
            # if after removing CH formulas and the list becomes empty
            vouch_nsc = ["Didn't find any NSCs"]
        vouch_nsc = list(defaultdict.fromkeys(vouch_nsc).keys())
    else:
        if not vouch_nsc:
            vouch_nsc = ["Didn't find any NSCs"]
    # filter the nscs by values that have all uppercase
    vouch_nsc = list(
        filter(lambda x: not any([c.islower() for c in x]), vouch_nsc))
    return vouch_nsc
Example #5
0
def _update_features(id):
    try:
        webapp = Webapp.objects.get(pk=id)
    except Webapp.DoesNotExist:
        _log(id, u'Webapp does not exist')
        return

    # We only detect features on packaged webapps.
    if not webapp.is_packaged:
        _log(id, u'Webapp is not a packaged app')
        return

    # If the app doesn't have a current_version, don't bother either.
    if not webapp.current_version:
        _log(id, u'Webapp does not have a current_version')
        return

    # If the app already has a non-empty feature profile, don't touch it.
    features = webapp.current_version.features
    if features.to_list():
        _log(id, u'Webapp already has a non-empty feature profile')
        return

    version = webapp.current_version
    res = run_validator(version.all_files[0].file_path)
    validation_result = json.loads(res)

    # Set all detected features as True and save them.
    feature_profile = validation_result['feature_profile']
    keys = ['has_%s' % feature.lower() for feature in feature_profile]
    data = defaultdict.fromkeys(keys, True)

    # Update features.
    features.update(**data)
Example #6
0
    def get_data_and_recipes(self):
        #### Getting the data
        recipes_df = pd.read_csv(r'./data/full_dataframe.csv',
                                 na_values=['< 1']).fillna(0)
        recipes_df['ingredients'] = recipes_df.ingredients.apply(literal_eval)
        recipes_df['recipe_name'] = recipes_df['recipe_name'].astype(str)
        recipes_df = recipes_df[recipes_df['total_time'] != 0]
        recipes_df = recipes_df.drop_duplicates(subset=['recipe_name'])
        print("total number of recipes:", len(recipes_df))

        ####Initalize dictionary with all of the recipe names and a proportion of 0
        recipe_list = defaultdict.fromkeys(recipes_df['recipe_name'].to_list(),
                                           0)

        ####Initialize dictionary
        # all_ingredients = defaultdict(int)

        # ####Populate dictionary of all ingredients in recipe csv
        # for ind in recipes_df.index:
        #     temp_list = recipes_df['ingredients'][ind]
        #     for item in temp_list:
        #         all_ingredients[str(item)] += 1

        # print("\nUser ingredients: ", user_ingred_list)

        # print("\nPlease wait...\n")

        ### Filtering recipe strictly for ingredients
        n = len(self.user_ingred_list)
        # if self.strict == "n":
        #     recipes_df = recipes_df[recipes_df["n_ingredients"] <= n]
        #     pass

        ####Filtering: proportions and banned ingredients
        for index, row in recipes_df.iterrows():
            ingre_list = row['ingredients'].copy()
            nn = len(ingre_list)
            if self.strict == "y":
                nn = len(
                    ingre_list
                ) * 0.001  # the priority is: the ingredients in recipe are in user's ingredients --> Minimalist list of ingredients
            else:
                nn = n  # the priority is: the userĀ“s ingredients are in recipe's ingredients -->  Long list of ingredients
            for user_rec in self.user_ingred_list:
                r = re.compile(r".*\b(" + user_rec + ")\\b",
                               flags=re.IGNORECASE)
                match = list(filter(r.match, ingre_list))
                if match:
                    if self.user_banned_ingre != []:
                        for user_ban in self.user_banned_ingre:
                            r_1 = re.compile(r".*\b(" + user_ban + ")\\b",
                                             flags=re.IGNORECASE)
                            match_1 = list(filter(r_1.match, ingre_list))
                            if not match_1:
                                recipe_list[row['recipe_name']] += 1 / nn
                                # ingre_list.remove(match[0])
                    else:
                        recipe_list[row['recipe_name']] += 1 / nn
                        # ingre_list.remove(match[0])
        return recipe_list, recipes_df
Example #7
0
def _ngram_values(string_list, n, readjust_zero_scores=True):
    '''Given the corpus of strings in 'string_list', computes n-gram
    statistics across the corpus.  Returns the results as a dictionary
    containing all possible n-grams, where the dictionary keys are the
    n-grams as strings (e.g., 'aa', 'ab', 'ac', ...) and the dictionary
    values dictionary are the named tuple NGramData.  The numeric values
    inside the NGramData reflect the frequency statistics for that n-gram
    across the whole corpus.

    The optional argument 'readjust_zero_scores' governs what happens to the
    IDF values assigned to n-grams that do not appear in the corpus at all.
    If readjust_zero_scores = False, nothing is done, and the values are left
    at 0.  If readjust_zero_scores = True, the value is set equal to the
    highest IDF value found across the 'string_list' corpus.  (In our
    application, values of 0 in this situation are *not* desirable.  In IDF
    terms, a lower value indicates a more frequently-seen n-gram, whereas in
    our application, we look for uncommon n-grams and thus we want never-seen
    n-grams to have a *high* value.  This *could* be handled by detecting
    them when computing string scores, but that simply introduces needless
    repeated if-then tests in the step of computing scores for strings.  It
    is more efficient to store the desired value.  This is the reason the
    default is readjust_zero_scores = True.  Note that it is still possible
    to determine that a given n-gram does not appear in the corpus simply by
    looking at the string_frequency field of the NGramData tuple for that
    n-gram, so we do not really lose any information by doing this.)
    '''
    counts = defaultdict(int)
    occurrences = defaultdict(set)
    num_strings = 0
    for s in string_list:
        s = s.lower()
        num_strings += 1
        for ngram in ngrams(s, n):
            occurrences[ngram].add(s)
            counts[ngram] += 1
    # Set initial values for all n-grams.
    all_ngrams = defaultdict.fromkeys(
        _all_possible_ngrams(n),
        NGramData(string_frequency=0, total_frequency=0, idf=0))
    # Set n-gram values based on occurrences in the corpus.
    max_frequency = max([count for ngram, count in counts.items()])
    for ngram, string_list in occurrences.items():
        string_freq = len(string_list)
        total_freq = counts[ngram]
        score = _ngram_idf_value(num_strings, string_freq, total_freq,
                                 max_frequency)
        all_ngrams[ngram] = NGramData(string_frequency=string_freq,
                                      total_frequency=total_freq,
                                      idf=score)
    # Now that we've seen all n-grams actually present in the corpus, go back
    # and set those that have 0 values to a very high value (=> rare n-gram).
    if readjust_zero_scores:
        max_idf = ceil(_highest_idf(all_ngrams))
        for ngram, value in all_ngrams.items():
            if value.idf == 0:
                # Can't set a value in an existing tuple; must regenerate tuple
                all_ngrams[ngram] = NGramData(string_frequency=0,
                                              total_frequency=0,
                                              idf=max_idf)
    return all_ngrams
def extract_with_lat_lon():

  file_name = '../data/fifi_data.xlsx'
  required_columns = ['Service Request Number', 'Created Date', 'Location', 'Location Details', 'Description']

  xls = pd.ExcelFile(file_name)
  fifi_dict = defaultdict.fromkeys(xls.sheet_names)

  # sheet_to_df_map = pd.read_excel(file_name, sheet_name=xls.sheet_names, usecols=required_columns, parse_dates=True)

  for name in fifi_dict.keys():
    fifi_dict[name] = pd.read_excel(xls, name, usecols=required_columns)
  # Assign the sheet name/key as category for each dataframe, so that the dataframes can be distinguished by category post merge.

  for name, value in fifi_dict.items():
    fifi_dict[name]['Category'] = name

  df = pd.concat(fifi_dict, ignore_index=True)

  df['location_latitude'] = df['Location Details'].str.extract("LatLng: (.*),.*$", expand=True)
  df['location_longitude'] = df['Location Details'].str.extract("LatLng: .*, (.*)$", expand=True)

  df['location_X'] = df['Location Details'].str.extract("XY: (.*),.*LatLng:.*$", expand=True)
  df['location_Y'] = df['Location Details'].str.extract("XY: .*,(.*); LatLng:.*$", expand=True)

  # df = pd.read_csv('fifi_cleaned.csv', parse_dates=True)

  df['zipcode'] = df.apply(lambda x: get_zipcode(str(x.location_latitude).strip(), str(x.location_longitude).strip()),
                           axis=1)

  df.to_csv('../data/fifi_cleaned.csv')
Example #9
0
def extract_nsc(text_body):
    # vouch_nsc = re.findall(r'(^[JCNQFM]{1}\w{1,}\d{2,})', text_body)
    # vouch_nsc = re.findall(r'([^ABD-IK-LO-PR-Zabd-ik-lo-pr-z]\w+\d{2,})', text_body)
    first_pass_regex_match = [
        rtn_regex_grp(p, text_body) for p in nsc_prefix_lst_short
    ]
    second_pass_regex_match = re.findall(r'([J|C|N|Q|F|M|0]{1}\d{2,}\w+)',
                                         text_body)
    cmb_regex_matches = list(
        set(second_pass_regex_match + [m
                                       for m in first_pass_regex_match if m]))
    try:
        vouch_nsc = [
            r for r in cmb_regex_matches if len(r) < 10 and len(r) > 4
        ]
    except:
        vouch_nsc = ["Didn't find any NSCs"]
    print(f'NSCs: {vouch_nsc}')
    if len(cmb_regex_matches) > 1:
        vouch_nsc = nsc_exclude(vouch_nsc)
        if not vouch_nsc:
            # if after removing CH formulas and the list becomes empty
            vouch_nsc = ["Didn't find any NSCs"]
        vouch_nsc = list(defaultdict.fromkeys(vouch_nsc).keys())
    else:
        if not vouch_nsc:
            vouch_nsc = ["Didn't find any NSCs"]
    # filter the nscs by length of greater than 4
    vouch_nsc = list(filter(lambda x: len(x) > 4))
    return vouch_nsc
Example #10
0
def _update_features(id):
    try:
        webapp = Webapp.objects.get(pk=id)
    except Webapp.DoesNotExist:
        _log(id, u"Webapp does not exist")
        return

    # We only detect features on packaged webapps.
    if not webapp.is_packaged:
        _log(id, u"Webapp is not a packaged app")
        return

    # If the app doesn't have a current_version, don't bother either.
    if not webapp.current_version:
        _log(id, u"Webapp does not have a current_version")
        return

    # If the app already has a feature profile, don't touch it.
    if AppFeatures.objects.filter(version=webapp.current_version).exists():
        _log(id, u"Webapp already has a feature profile")
        return

    version = webapp.current_version
    res = run_validator(version.all_files[0].file_path)
    validation_result = json.loads(res)

    # Set all detected features as True and save them.
    feature_profile = validation_result["feature_profile"]
    keys = ["has_%s" % feature.lower() for feature in feature_profile]
    data = defaultdict.fromkeys(keys, True)
    AppFeatures.objects.create(version=version, **data)
Example #11
0
    def export(self, file_format ="json", directory=None, depth_limit = None):
        
        if self.task["format"] is False:
            self.format = file_format
        else:
            self.format = task["format"]
        if self.format not in ["json", "csv", "sql", "mongodb", "mongo"]:
            sys.exit("Wrong export format")
        
        
        
        if depth_limit is not None:
            completed_depth = depth_limit
            
        else:
            try:
                completed_depth = int(self.current_level() - 1)
            except IndexError:
                completed_depth = 2
        if directory is None:
            directory = os.path.join(RESULT_PATH, self.project)
        results_fields = defaultdict.fromkeys([u'cited_domains', u'extension', u'title', u'url', u'source_url', u'date', u'depth', u'url_id', u'cited_links', u'cited_links_ids',u'crawl_nb'], 1)
        for n in self.data.find({"status.0":True},{"_id":1}):
            print(n.findOne({"_id": n["_id"]},results_fields))
            

            break
        #query_str = '{last_status:true, depth:{$lte:%i}}, {\"_id\": 0, \"last_cited_links_ids\":1, \"last_title\":1, \"last_text\":1, \"last_status\":1, \"last_date\":1, \"depth\":1, \"url\":1, \"url_id\":1}' %completed_depth
        #query, projection = {"last_status":True,"depth":{"$lte":completed_depth}},{"_id": 0, "last_cited_links_ids":1, "last_title":1, "last_text":1, "last_status":1, "last_date":1, "depth":1, "url":1, "url_id":1}
        
        
        outfile = os.path.join(directory, "results_export"+self.date.strftime("%d%m%Y_%H-%M")+"."+str(format))
Example #12
0
    def _changed_features(self):
        old_features = defaultdict.fromkeys(self.initial_features, True)
        old_features = set(unicode(f) for f in AppFeatures(**old_features).to_list())
        new_features = set(unicode(f) for f in self.instance.to_list())

        added_features = new_features - old_features
        removed_features = old_features - new_features
        return added_features, removed_features
Example #13
0
def find_kmers(orientations, k, alphabet=['>', '<']):
    orientations = np.array(list(orientations))
    patterns = defaultdict.fromkeys(
        list(map(lambda x: "".join(x), itertools.product(alphabet, repeat=k))), 0)

    for i in range(orientations.shape[0] - k + 1):
        current_window = "".join(orientations[i: i + k])
        patterns[current_window] = patterns[current_window] + 1
    return dict(patterns)
Example #14
0
 def __init__(self, input_data: dict, batch_sizes: list):
     self.input_data = input_data
     self.sizes_map = {}
     for name, tensors in input_data.items():
         self.sizes_map[name] = len(tensors)
     self.index_map = defaultdict.fromkeys(input_data.keys(), 0)
     self.batch_sizes = batch_sizes
     self.size = len(batch_sizes)
     self.current_group_id = 0
Example #15
0
    def _changed_features(self):
        old_features = defaultdict.fromkeys(self.initial_features, True)
        old_features = set(
            unicode(f) for f in AppFeatures(**old_features).to_list())
        new_features = set(unicode(f) for f in self.instance.to_list())

        added_features = new_features - old_features
        removed_features = old_features - new_features
        return added_features, removed_features
Example #16
0
def gmnb_ngram_factors(real_strings, nonsense_strings, n, theta=0):
    # This actually stores the logarithm of the probabilities, because that's
    # the quantity used when applying the Bayes formula and it's more efficient
    # to precompute the log than to have to compute it over and over again.

    occurrences_real     = defaultdict(int)
    occurrences_nonsense = defaultdict(int)
    for string in real_strings:
        for ngram in ngrams(string.lower(), n):
            occurrences_real[ngram] += 1
    for string in nonsense_strings:
        for ngram in ngrams(string.lower(), n):
            occurrences_nonsense[ngram] += 1

    real_sum = sum(occurrences_real.values())
    nonsense_sum = sum(occurrences_nonsense.values())

    all_ngrams = all_possible_ngrams(n)
    num_ngrams = len(all_ngrams)
    num_real_strings = len(real_strings)
    num_nonsense_strings = len(nonsense_strings)

    # In this generalized version of tf-idf, if the tf for a given document
    # is 0, then the whole term is 0.  So our default is simple:
    # weights = defaultdict.fromkeys(all_ngrams, NGramFactor(real_term=0, nonsense_term=0))

    missing_real = log(relative_frequency(0, real_sum, num_ngrams, 1))
    missing_nonsense = log(relative_frequency(0, nonsense_sum, num_ngrams, 1))
    weights = defaultdict.fromkeys(all_ngrams,
                                   NGramFactor(real_term=missing_real,
                                               nonsense_term=missing_nonsense))

    # We store everything that is constant for a given n-gram.
    # theta_over_N = theta/num_ngrams
    # for ngram in all_ngrams:
    #     real_term = missing_real
    #     nonsense_term = missing_nonsense
    #     if occurrences_real[ngram] > 0:
    #         idf = log(num_real_strings/occurrences_real[ngram])
    #         real = relative_frequency(occurrences_real[ngram], real_sum, num_ngrams)
    #         real_term = idf*log((1 - theta)*real + theta_over_N)
    #     if occurrences_nonsense[ngram] > 0:
    #         idf = log(num_nonsense_strings/occurrences_nonsense[ngram])
    #         nonsense = relative_frequency(occurrences_nonsense[ngram], nonsense_sum, num_ngrams)
    #         nonsense_term = idf*log((1 - theta)*nonsense + theta_over_N)
    #     weights[ngram] = NGramFactor(real_term=real_term, nonsense_term=nonsense_term)

    theta_over_N = theta/num_ngrams
    for ngram in all_ngrams:
        real = relative_frequency(occurrences_real[ngram], real_sum, num_ngrams)
        real_term = log((1 - theta)*real + theta_over_N)
        nonsense = relative_frequency(occurrences_nonsense[ngram], nonsense_sum, num_ngrams)
        nonsense_term = log((1 - theta)*nonsense + theta_over_N)
        weights[ngram] = NGramFactor(real_term=real_term, nonsense_term=nonsense_term)

    return weights
Example #17
0
 def largest_connected_component(self):
     visited = defaultdict.fromkeys(self.adj.keys(), False)
     largest = 1
     for key in visited:
         print(visited)
         if not visited[key]:
             size = self.dfs(key, visited)
             if size > largest:
                 largest = size
     return largest
Example #18
0
 def create_word_indices(self):
     self.n_words = len(
         self.ordered_data
     )  # number of total number of words in corpus combinint all documents
     self.vocab = list(set(self.ordered_data))  # set of all unique words.
     self.len_vocab = len(self.vocab)
     word_ind = defaultdict.fromkeys(self.vocab, 0)
     for i in range(self.len_vocab):
         word_ind[self.vocab[i]] = i + 1
     self.word_indices = np.zeros(self.n_words, dtype=int)
     for i in range(self.n_words):
         self.word_indices[i] = word_ind[self.ordered_data[i]]
Example #19
0
def convert_to_defaultdict(x):
    if isinstance(x, list) or isinstance(x, np.ndarray):
        k = defaultdict(int, izip(xrange(len(x)), x))
    elif isinstance(x, dict):
        k = defaultdict(int, x)
    elif isinstance(x, set):
        k = defaultdict.fromkeys(x, 1)
        k.default_factory = int
    else:
        raise ValueError("Invalid param type %s for jaccard similarity" %
                         type(x))
    return k
def gen_requests(max_requests, batch_size, f):
    client_key_pub = ecdsa_sig.get_asymm_key(CLIENT_ID - 1, ktype='verify')
    client_key_pem = ecdsa_sig.get_asymm_key(CLIENT_ID - 1, ktype='sign')
    keys_to_seq_tracker = defaultdict.fromkeys(range(N), 0)
    for i in range(max_requests):
        # print("for request: [%s]" % i)
        r = random.randint(1, max_requests)
        r2 = random.randint(max_requests / 2, max_requests)
        # amount = random.randint(0,100)
        amount = 50
        msg = "TRAN%s%s" % (
            str(amount).zfill(4),
            str(r2).zfill(4),
            #.decode('latin1')
        )
        msg = bytes(msg, encoding="utf-8") + client_key_pub.to_string()
        if i == 0:
            print(len(client_key_pub.to_string()), "ECDSA")

        _id = r % N
        # print("current ID: ", _id)
        current_key = ecdsa_sig.get_asymm_key(_id, ktype='sign')
        # import pdb; pdb.set_trace()
        seq = keys_to_seq_tracker[_id]
        view = 0  # TODO: do we even need view in request from client??
        req = message.add_sig(current_key, _id, seq, view, "REQU", msg, i)
        keys_to_seq_tracker[
            _id] += 1  # Increase seq count since same node might send another request
        req.sig = client_key_pem.to_string()
        msg = req.SerializeToString()
        if i == 0:
            print("one request:", len(msg))
        ################################
        #padding = "0123456789" * 10
        #msg += padding * 1024
        #msg += "x" * (400 - len(msg))
        msg += msg * (batch_size - 1)
        ################################
        temp = message.add_sig(current_key, _id, 0, 0, "REQU", msg, i + offset)
        size = temp.ByteSize()
        if i == 0:
            print("inner message:", len(msg))
            print("message byte size:", size)
        b = struct.pack("!I", size)
        f.write(b + temp.SerializeToString())
        # if i % batch_size == 0:
        s = "Generation Progress: {:.1%}".format(i / max_requests)
        print(s, end='')
        backspace(len(s))
        # time.sleep(0.02)
    # see the counter
    print("keys_to_seq_tracker: ", keys_to_seq_tracker)
def iter_params(data, lam):
    thresh = 0.00000001
    alpha = 4.0
    u_init = field_rating_average(data, 'reviewerID') - alpha 
    i_init = field_rating_average(data, 'itemID') - alpha
    user_list = [review['reviewerID'] for review in data]
    item_list = [review['itemID'] for review in data]
    b_u = defaultdict.fromkeys(user_list, u_init)
    b_i = defaultdict.fromkeys(item_list, i_init)
    b_u.default_factory = float
    b_i.default_factory = float
    u_dict, i_dict = build_ui_dicts(data)
    while True:
        #print alpha
        #alpha_old, b_u_old, b_i_old = copy(alpha), deepcopy(b_u), deepcopy(b_i)
        alpha_old = copy(alpha)
        alpha = update_alpha(data, alpha, b_u, b_i)
        b_u = update_bu(u_dict, alpha, b_u, b_i, lam)
        b_i = update_bi(i_dict, alpha, b_u, b_i, lam)
        if abs(alpha_old - alpha) < thresh:
            break
    return alpha, b_u, b_i, user_list, item_list
Example #22
0
def mnb_ngram_weights(real_strings, nonsense_strings, n, smoothing=1):
    # This is almost exactly the algorithm given in Figure 13.2 of the book
    # "Introduction to information retrieval" by Manning, C. D., Raghavan,
    # P., & SchĆ¼tze, H. (2009, Online edition ed., Cambridge University
    # Press).  The differences are:
    #
    # 1) This ignores the probability of the priors, P(c).  In our training
    #    process, we have almost exactly balanced sets of real and nonsense
    #    strings, which means the value of (count of docs in class)/(count
    #    of docs in training set) is equal for both classes, and thus does not
    #    change the results of the maximum a posteriori analysis later.
    #
    # 2) The normal approach to Naive Bayes (and what Manning et al. do)
    #    would be to store the score for an n-gram rather than the logarithm
    #    of the score.  This code actually stores the logarithm of the
    #    probabilities, because that's the quantity used when applying the
    #    Bayes formula and it's more efficient to precompute the log than to
    #    have to compute it over and over again in mnb_score_function()

    occurrences_real     = defaultdict(int)
    occurrences_nonsense = defaultdict(int)
    for string in real_strings:
        for ngram in ngrams(string.lower(), n):
            occurrences_real[ngram] += 1
    for string in nonsense_strings:
        for ngram in ngrams(string.lower(), n):
            occurrences_nonsense[ngram] += 1

    real_sum = sum(occurrences_real.values())
    nonsense_sum = sum(occurrences_nonsense.values())

    all_ngrams = all_possible_ngrams(n)
    num_ngrams = len(all_ngrams)

    # Initialize all n-gram values to the value that would come from zero
    # occurrences of an n-gram in a given training set.
    missing_real = log(relative_frequency(0, real_sum, num_ngrams, smoothing))
    missing_nonsense = log(relative_frequency(0, nonsense_sum, num_ngrams, smoothing))
    weights = defaultdict.fromkeys(all_ngrams,
                                   NGramWeight(log_real=missing_real,
                                               log_nonsense=missing_nonsense,
                                               ts_real=False, ts_nonsense=False))
    for ngram in all_ngrams:
        real = log(relative_frequency(occurrences_real[ngram], real_sum,
                                      num_ngrams, smoothing))
        nonsense = log(relative_frequency(occurrences_nonsense[ngram],
                                          nonsense_sum, num_ngrams, smoothing))
        weights[ngram] = NGramWeight(log_real=real, log_nonsense=nonsense,
                                     ts_real=bool(occurrences_real[ngram]),
                                     ts_nonsense=bool(occurrences_nonsense[ngram]))
    return weights
Example #23
0
def rtn_possible_wp(pdf_pt):
    '''
       returns possible word pairs that the model deeemed as a taxonomy name;
       and the filtered text list with part of speech tagging
    '''
    possible_list = []
    for wp in filter_text_pos(pdf_pt):
        res = model.predict([wp])
        print(wp, res)
        if res > 0.4:
            possible_list.append(wp)
    if possible_list:
        return list(defaultdict.fromkeys(possible_list).keys())
    return ["Sorry, failed to find taxonomy-like names in the document"]
Example #24
0
def rtn_possible_wp(vc, pdf_pt):
    '''
       returns possible word pairs that the classifier deeemed as a taxonomy name;
       it is not that smart; requires a VoteClassifer object, and the filtered
       text list with part of speech tagging
    '''
    possible_list = []
    for wp in filter_text_pos(pdf_pt):
        res = vc.classify(wp)
        if res:
            possible_list.append(wp)
    if possible_list:
        return list(defaultdict.fromkeys(possible_list).keys())
    return ["Sorry, failed to find taxonomy-like names in the document"]
Example #25
0
def filter_top_electeurs(votes, SEUIL):
    from collections import Counter, defaultdict
    electeurs_d = defaultdict.fromkeys([v["electeur"] for v in votes], [])
    for vote in votes:
        electeurs_d[vote["electeur"]].append(v)
    
    top_users = []
    f = Counter([data["electeur"] for data in votes])
    
    for n,cpt in f.items():
        if cpt < SEUIL:
            del electeurs_d[n]
    #for k, v in electeurs_d.items():
    #    top_users.append(v)
    print len(electeurs_d), "electeurs uniques ayant votƩ au moins %i fois" %SEUIL
    return electeurs_d
Example #26
0
def validate_agent_per_data_game(agent: IAgent, dg: DataGame, min_tricks: int=0)\
        -> Tuple[np.ndarray, np.ndarray]:
    """
    Validate a agent by comparing its performances to data.
    :param agent: IAgent to check vs the data
    :param dg: DataGame object
    :param min_tricks: minimum tricks index to start validation from
    :return: tuple of 2 arrays for experiences and succeeds. each element in
        each array represents the number of played tricks (== 13 - (#card in hand))
    """
    all_hands, all_tricks, chosen_cards = dg.all_relevant_snapshots()
    tricks_num = len(all_hands) // 2
    checks, succeeds = np.zeros(12), np.zeros(12)

    for pos_idx, position in enumerate(dg.winner):
        for trick_idx in range(min_tricks, tricks_num):
            curr_hands = all_hands[pos_idx * tricks_num + trick_idx]
            curr_trick = all_tricks[pos_idx * tricks_num + trick_idx]
            chosen_card = chosen_cards[pos_idx * tricks_num + trick_idx]
            # Create teams, such that first team is the winner
            teams = [
                Team(curr_hands[0], curr_hands[2]),
                Team(curr_hands[1], curr_hands[3])
            ]
            if curr_hands[0].position not in dg.winner:
                teams[0], teams[1] = teams[1], teams[0]

            curr_state = State(trick=curr_trick,
                               teams=teams,
                               players=curr_hands,
                               prev_tricks=dg.tricks[:trick_idx],
                               score=defaultdict.fromkeys(teams, 0),
                               curr_player=curr_hands[position.value - 1])

            sg = SimulatedGame(
                agent=agent,
                other_agent=SimpleAgent('soft_long_greedy_action'),
                verbose_mode=False,
                state=curr_state)

            validation = 'simple' if isinstance(agent, SimpleAgent) else ''
            played_card = sg.play_single_move(validation=validation)
            if played_card == chosen_card:
                succeeds[trick_idx] += 1
            checks[trick_idx] += 1

        return checks, succeeds
Example #27
0
    def __init__(self, filename, image_column_name,
                 service_request_column_name, parent_dir):
        """
    Initialize variables and create a dictionary containing categories.

    :param filename: Name of the file that will be used for crawling.
    :param image_column_name: Column name of the field that has URLs of images.
    :param service_request_column_name: Column name of the Service Request Number.
    :param parent_dir: Name of the parent directory where images for each category will be downloaded.
    """
        self.logger = logging.getLogger(__name__)

        self.parent_dir = parent_dir
        self.image_column_name = image_column_name
        self.service_request_column_name = service_request_column_name
        self.xls = pd.ExcelFile(filename)
        self.fifi_dict = defaultdict.fromkeys(
            self.xls.sheet_names)  # Excel file sheet names map to categories
def compute_pagerank(urls, inlinks, outlinks, b=.85, iters=20):
    """ Return a dictionary mapping each url to its PageRank.
    The formula is R(u) = (1/N)(1-b) + b * (sum_{w in B_u} R(w) / (|F_w|)

    Initialize all scores to 1.0.

    Params:
      urls.......SortedList of urls (names)
      inlinks....SortedDict mapping url to list of in links (backlinks)
      outlinks...Sorteddict mapping url to list of outlinks
    Returns:
      A SortedDict mapping url to its final PageRank value (float)

    >>> urls = SortedList(['a', 'b', 'c'])
    >>> inlinks = SortedDict({'a': ['c'], 'b': set(['a']), 'c': set(['a', 'b'])})
    >>> outlinks = SortedDict({'a': ['b', 'c'], 'b': set(['c']), 'c': set(['a'])})
    >>> sorted(compute_pagerank(urls, inlinks, outlinks, b=.5, iters=0).items())
    [('a', 1.0), ('b', 1.0), ('c', 1.0)]
    >>> iter1 = compute_pagerank(urls, inlinks, outlinks, b=.5, iters=1)
    >>> iter1['a']  # doctest:+ELLIPSIS
    0.6666...
    >>> iter1['b']  # doctest:+ELLIPSIS
    0.333...
    """
    ###TODO
    pass

    pagerank = defaultdict.fromkeys(urls, 1.0)
    # print(pagerank)

    for iter in range(iters):
        for url in urls:
            sum_temp = 0.0
            for cs in inlinks[url]:
                length = len(outlinks[cs])
                sum_temp += pagerank[cs] / length

            pagerank[url] = (1.0 - b) * (1.0 / len(urls)) + (b * sum_temp)

    return pagerank
Example #29
0
def bnb_ngram_weights(real_strings, nonsense_strings, n):
    # Count the number of strings in which each n-gram occurs.  Note this is
    # not the same as all occurrences of the n-gram, which would entail
    # counting cases when an n-gram appears more than once in a string.)
    occurrences_real     = defaultdict(set)
    occurrences_nonsense = defaultdict(set)
    for string in real_strings:
        for ngram in ngrams(string.lower(), n):
            # Using a set so that if the n-gram appears more than once in a
            # given string, we only count it once.
            occurrences_real[ngram].update(string)
    for string in nonsense_strings:
        for ngram in ngrams(string.lower(), n):
            occurrences_nonsense[ngram].update(string)

    # Initialize all n-gram values to the value that would come from zero
    # occurrences of an n-gram in a given training set.
    missing_real = 1/(len(real_strings) + 2)
    missing_nonsense = 1/(len(nonsense_strings) + 2)
    all_ngrams = all_possible_ngrams(n)
    weights = defaultdict.fromkeys(all_ngrams,
                                   NGramWeight(found_in_ts=False,
                                               log_real=log(missing_real),
                                               log_nonsense=log(missing_nonsense),
                                               log_one_minus_real=log(1 - log(missing_real)),
                                               log_one_minus_nonsense=log(1 - missing_nonsense)))
    num_real_strings = len(real_strings)
    num_nonsense_strings = len(nonsense_strings)
    for ngram in all_ngrams:
        num_occurrences_real = len(occurrences_real[ngram])
        num_occurrences_nonsense = len(occurrences_nonsense[ngram])
        real = (num_occurrences_real + 1)/(num_real_strings + 2)
        nonsense = (num_occurrences_nonsense + 1)/(num_nonsense_strings + 2)
        found = (num_occurrences_real + num_occurrences_nonsense) > 0
        weights[ngram] = NGramWeight(found_in_ts=found,
                                     log_real=log(real),
                                     log_nonsense=log(nonsense),
                                     log_one_minus_real=log(1 - real),
                                     log_one_minus_nonsense=log(1 - nonsense))
    return weights
Example #30
0
def choose_attribute(attributes, examples, labels, attribute_subset):
    # compute total
    ca_prob_dict = {}

    attributes_filtered = [attribute for attribute in attributes if random.random() < attribute_subset]
    #print attributes_filtered

    for attribute in attributes_filtered:
        ca_prob_dict[attribute] = (0, 0)

    ca_total = len(examples.keys())
    ca_total1 = 0
    ca_total2 = 0

    # compute number of 1s and 2s in sample
    for example_key in examples.keys():
        if labels[example_key - 1] == '1':
            ca_total1 += 1
        elif labels[example_key - 1] == '2':
            ca_total2 += 1

    # compute number of TFs with each word
    for attribute in attributes_filtered:
        for key, value in examples.iteritems():
            if labels[key - 1] == '1' and attribute in value:
                ca_prob_dict[attribute] = (ca_prob_dict[attribute][0] + 1, ca_prob_dict[attribute][1])
            elif labels[key - 1] == '2' and attribute in value:
                ca_prob_dict[attribute] = (ca_prob_dict[attribute][0], ca_prob_dict[attribute][1] + 1)

    # compute information gain
    ca_ig_dict = defaultdict.fromkeys(attributes_filtered)

    for key, value in ca_prob_dict.iteritems():
        ca_ig_dict[key] = ig(value[0], value[1], ca_total1 - value[0],
                             ca_total2 - value[1], ca_total)

    # print(value[0], value[1], ca_total1 - value[0], ca_total2 - value[1])
    gnf_max_item = max(ca_ig_dict.iteritems(), key=operator.itemgetter(1))
    # print(words[gnf_max_item[0] - 1] , gnf_max_item)
    return gnf_max_item[0], ca_prob_dict[gnf_max_item[0]]
Example #31
0
def status(request, addon_id, addon):
    appeal_form = forms.AppAppealForm(request.POST, product=addon)
    upload_form = NewWebappVersionForm(request.POST or None, is_packaged=True,
                                       addon=addon, request=request)
    publish_form = forms.PublishForm(
        request.POST if 'publish-app' in request.POST else None, addon=addon)

    if request.method == 'POST':
        if 'resubmit-app' in request.POST and appeal_form.is_valid():
            if not addon.is_rated():
                # Cannot resubmit without content ratings.
                return http.HttpResponseForbidden(
                    'This app must obtain content ratings before being '
                    'resubmitted.')

            appeal_form.save()
            create_comm_note(addon, addon.latest_version,
                             request.user, appeal_form.data['notes'],
                             note_type=comm.RESUBMISSION)
            if addon.vip_app:
                handle_vip(addon, addon.latest_version, request.user)

            messages.success(request, _('App successfully resubmitted.'))
            return redirect(addon.get_dev_url('versions'))

        elif 'upload-version' in request.POST and upload_form.is_valid():
            upload = upload_form.cleaned_data['upload']
            ver = Version.from_upload(upload, addon)

            # Update addon status now that the new version was saved.
            addon.update_status()

            res = run_validator(ver.all_files[0].file_path)
            validation_result = json.loads(res)

            # Escalate the version if it uses prerelease permissions.
            escalate_prerelease_permissions(addon, validation_result, ver)

            # Set all detected features as True and save them.
            keys = ['has_%s' % feature.lower()
                    for feature in validation_result['feature_profile']]
            data = defaultdict.fromkeys(keys, True)

            # Set "Smartphone-Sized Displays" if it's a mobile-only app.
            qhd_devices = (set((mkt.DEVICE_GAIA,)),
                           set((mkt.DEVICE_MOBILE,)),
                           set((mkt.DEVICE_GAIA, mkt.DEVICE_MOBILE,)))
            mobile_only = (addon.latest_version and
                           addon.latest_version.features.has_qhd)
            if set(addon.device_types) in qhd_devices or mobile_only:
                data['has_qhd'] = True

            # Update feature profile for this version.
            ver.features.update(**data)

            messages.success(request, _('New version successfully added.'))
            log.info('[Webapp:%s] New version created id=%s from upload: %s'
                     % (addon, ver.pk, upload))

            if addon.vip_app:
                handle_vip(addon, ver, request.user)

            return redirect(addon.get_dev_url('versions.edit', args=[ver.pk]))

        elif 'publish-app' in request.POST and publish_form.is_valid():
            publish_form.save()
            return redirect(addon.get_dev_url('versions'))

    ctx = {
        'addon': addon,
        'appeal_form': appeal_form,
        'is_tarako': addon.tags.filter(tag_text=QUEUE_TARAKO).exists(),
        'tarako_review': addon.additionalreview_set
                              .latest_for_queue(QUEUE_TARAKO),
        'publish_form': publish_form,
        'QUEUE_TARAKO': QUEUE_TARAKO,
        'upload_form': upload_form,
    }

    # Used in the delete version modal.
    if addon.is_packaged:
        versions = addon.versions.values('id', 'version')
        version_strings = dict((v['id'], v) for v in versions)
        version_strings['num'] = len(versions)
        ctx['version_strings'] = json.dumps(version_strings)

    if addon.status == mkt.STATUS_REJECTED:
        try:
            entry = (AppLog.objects
                     .filter(addon=addon,
                             activity_log__action=mkt.LOG.REJECT_VERSION.id)
                     .order_by('-created'))[0]
        except IndexError:
            entry = None
        # This contains the rejection reason and timestamp.
        ctx['rejection'] = entry and entry.activity_log

    if waffle.switch_is_active('preload-apps'):
        test_plan = PreloadTestPlan.objects.filter(
            addon=addon, status=mkt.STATUS_PUBLIC)
        if test_plan.exists():
            test_plan = test_plan[0]
            if (test_plan.last_submission <
                    settings.PREINSTALL_TEST_PLAN_LATEST):
                ctx['outdated_test_plan'] = True
            ctx['next_step_suffix'] = 'submit'
        else:
            ctx['next_step_suffix'] = 'home'
        ctx['test_plan'] = test_plan

    return render(request, 'developers/apps/status.html', ctx)
Example #32
0
def status(request, addon_id, addon, webapp=False):
    form = forms.AppAppealForm(request.POST, product=addon)
    upload_form = NewWebappVersionForm(request.POST or None,
                                       is_packaged=True,
                                       addon=addon,
                                       request=request)

    if request.method == 'POST':
        if 'resubmit-app' in request.POST and form.is_valid():
            form.save()
            create_comm_note(addon,
                             addon.current_version,
                             request.amo_user,
                             form.data['notes'],
                             note_type=comm.RESUBMISSION)

            messages.success(request, _('App successfully resubmitted.'))
            return redirect(addon.get_dev_url('versions'))

        elif 'upload-version' in request.POST and upload_form.is_valid():
            mobile_only = (addon.latest_version
                           and addon.latest_version.features.has_qhd)

            ver = Version.from_upload(upload_form.cleaned_data['upload'],
                                      addon, [amo.PLATFORM_ALL])

            # Update addon status now that the new version was saved.
            addon.update_status()

            res = run_validator(ver.all_files[0].file_path)
            validation_result = json.loads(res)

            # Set all detected features as True and save them.
            keys = [
                'has_%s' % feature.lower()
                for feature in validation_result['feature_profile']
            ]
            data = defaultdict.fromkeys(keys, True)

            # Set "Smartphone-Sized Displays" if it's a mobile-only app.
            qhd_devices = (set((amo.DEVICE_GAIA, )), set(
                (amo.DEVICE_MOBILE, )),
                           set((
                               amo.DEVICE_GAIA,
                               amo.DEVICE_MOBILE,
                           )))
            if set(addon.device_types) in qhd_devices or mobile_only:
                data['has_qhd'] = True

            # Update feature profile for this version.
            ver.features.update(**data)

            messages.success(request, _('New version successfully added.'))
            log.info('[Webapp:%s] New version created id=%s from upload: %s' %
                     (addon, ver.pk, upload_form.cleaned_data['upload']))
            return redirect(addon.get_dev_url('versions.edit', args=[ver.pk]))

    ctx = {
        'addon': addon,
        'webapp': webapp,
        'form': form,
        'upload_form': upload_form
    }

    # Used in the delete version modal.
    if addon.is_packaged:
        versions = addon.versions.values('id', 'version')
        version_strings = dict((v['id'], v) for v in versions)
        version_strings['num'] = len(versions)
        ctx['version_strings'] = json.dumps(version_strings)

    if addon.status == amo.STATUS_REJECTED:
        try:
            entry = (AppLog.objects.filter(
                addon=addon,
                activity_log__action=amo.LOG.REJECT_VERSION.id).order_by(
                    '-created'))[0]
        except IndexError:
            entry = None
        # This contains the rejection reason and timestamp.
        ctx['rejection'] = entry and entry.activity_log

    if waffle.switch_is_active('preload-apps'):
        test_plan = PreloadTestPlan.objects.filter(addon=addon,
                                                   status=amo.STATUS_PUBLIC)
        if test_plan.exists():
            test_plan = test_plan[0]
            if (test_plan.last_submission <
                    settings.PREINSTALL_TEST_PLAN_LATEST):
                ctx['outdated_test_plan'] = True
            ctx['next_step_suffix'] = 'submit'
        else:
            ctx['next_step_suffix'] = 'home'
        ctx['test_plan'] = test_plan

    return jingo.render(request, 'developers/apps/status.html', ctx)
Example #33
0
crap2.incrap2()
import testdata.crap2
tc2c2 = testdata.crap2.crap2()

#int/double crap
def to_ints(l):
    return [int(x) for x in l]

print to_ints([4.0, 4.0, 61]), to_ints((4.0, 4.0, 61))
print int(min(4.0, 4.0, 2))
print int(max(4.0, 4.0, 6))
print int(min(4.0, 4.0, 4.0, 2))
print int(max(4.0, 4.0, 4, 0, 6))
l = [6]
l.append(1.0)
print to_ints(l)

#assorted fixes
[1] != []

from collections import defaultdict
print sorted(defaultdict.fromkeys(range(7,10), 'a').items())
import collections
print sorted(collections.defaultdict.fromkeys(range(7,10), 'a').items())

from string import *
class string: pass
string.x = 4


Example #34
0
def status(request, addon_id, addon, webapp=False):
    form = forms.AppAppealForm(request.POST, product=addon)
    upload_form = NewWebappVersionForm(request.POST or None, is_packaged=True,
                                       addon=addon, request=request)

    if request.method == 'POST':
        if 'resubmit-app' in request.POST and form.is_valid():
            form.save()
            messages.success(request, _('App successfully resubmitted.'))
            return redirect(addon.get_dev_url('versions'))

        elif 'upload-version' in request.POST and upload_form.is_valid():
            mobile_only = (addon.latest_version and
                           addon.latest_version.features.has_qhd)

            ver = Version.from_upload(upload_form.cleaned_data['upload'],
                                      addon, [amo.PLATFORM_ALL])

            res = run_validator(ver.all_files[0].file_path)
            validation_result = json.loads(res)

            # Set all detected features as True and save them.
            keys = ['has_%s' % feature.lower()
                    for feature in validation_result['feature_profile']]
            data = defaultdict.fromkeys(keys, True)

            # Set "Smartphone-Sized Displays" if it's a mobile-only app.
            qhd_devices = (set((amo.DEVICE_GAIA,)),
                           set((amo.DEVICE_MOBILE,)),
                           set((amo.DEVICE_GAIA, amo.DEVICE_MOBILE,)))
            if set(addon.device_types) in qhd_devices or mobile_only:
                data['has_qhd'] = True

            # Update feature profile for this version.
            ver.features.update(**data)

            messages.success(request, _('New version successfully added.'))
            log.info('[Webapp:%s] New version created id=%s from upload: %s'
                     % (addon, ver.pk, upload_form.cleaned_data['upload']))
            return redirect(addon.get_dev_url('versions.edit', args=[ver.pk]))

    ctx = {'addon': addon, 'webapp': webapp, 'form': form,
           'upload_form': upload_form}

    # Used in the delete version modal.
    if addon.is_packaged:
        versions = addon.versions.values('id', 'version')
        version_strings = dict((v['id'], v) for v in versions)
        version_strings['num'] = len(versions)
        ctx['version_strings'] = json.dumps(version_strings)

    if addon.status == amo.STATUS_REJECTED:
        try:
            entry = (AppLog.objects
                     .filter(addon=addon,
                             activity_log__action=amo.LOG.REJECT_VERSION.id)
                     .order_by('-created'))[0]
        except IndexError:
            entry = None
        # This contains the rejection reason and timestamp.
        ctx['rejection'] = entry and entry.activity_log

    return jingo.render(request, 'developers/apps/status.html', ctx)
Example #35
0
                "slug": article9["article_link"].split("/")[-1],
                "title": article9["article_link"].split("/")[-1].replace("-", " "),
                "text": article9['body'],
                "author": article9["author"],
                "votes":[], 
                "arguments":article9["arguments"], 
                "votes_arguments": [],
                "sources":article9["sources"],
                "votes_sources": [],
               }
## Les versions alternatives

versions = article9["versions"]
versions_c = []
#
auteurs_d = defaultdict.fromkeys([v["author"] for v in versions], [])

versions_d = defaultdict.fromkeys([v["slug"] for v in versions], {})
for v in versions:
    vn = {"date":v["created_at"],
            "link":v["link"],
            "slug":v["slug"],
            "title":v["title"],
            "text": v['comment'],
            "author": v["author"],
                      "votes":[], 
            "arguments":[], 
            "votes_arguments": [],
            "sources":[],
            "votes_sources": []}
    auteurs_d[v["author"]].append(vn)
Example #36
0
import itertools

#load the files
training_file = 'C:/Users/kPasad/Box Sync/ML/Projects/africanSoilPred/data/training.csv'
test_file     = 'C:/Users/kPasad/Box Sync/ML/Projects/africanSoilPred/data/sorted_test.csv'
feat_imp = pk.load(open('C:/Users/kpasad/Box Sync/ML/Projects/africanSoilPred/data/feat_imp.pk','r')) 

df_train = pd.read_csv(training_file,tupleize_cols =True)
df_test = pd.read_csv(test_file)
train_dims = df_train.shape
algos = ['bayesianRidge,adaBoost','decisionTree','gradBoost','extraTree','linear']
targets = ['Ca','P','pH','SOC','Sand']


#All static data structures here
clf  = defaultdict.fromkeys(algos)
top_preds=defaultdict.fromkeys(targets)
master_obj = master()


#All parameters go here.
derivative_filt = 'disable'
feat_list = 'all'
cv_factor = 0.7
num_cv_folds =20
learner_id=0 


#Feature massage
train_cols_to_remove = ['PIDN']+targets
x_train=df_train.drop(train_cols_to_remove,axis=1) #Remove the training sample ID and the targets.
Example #37
0
def matchingStrings(strings, queries):
    op = defaultdict.fromkeys(queries, 0)
    for _ in strings:
        if _ in queries:
            op[_] = op.get(_) + 1
    return op
Example #38
0
def status(request, addon_id, addon, webapp=False):
    form = forms.AppAppealForm(request.POST, product=addon)
    upload_form = NewWebappVersionForm(request.POST or None, is_packaged=True, addon=addon, request=request)

    if request.method == "POST":
        if "resubmit-app" in request.POST and form.is_valid():
            form.save()
            perms = ("reviewer", "senior_reviewer", "staff")
            create_comm_thread(
                action="resubmit",
                addon=addon,
                comments=form.data["notes"],
                profile=request.amo_user,
                version=addon.current_version,
                perms=perms,
            )

            messages.success(request, _("App successfully resubmitted."))
            return redirect(addon.get_dev_url("versions"))

        elif "upload-version" in request.POST and upload_form.is_valid():
            mobile_only = addon.latest_version and addon.latest_version.features.has_qhd

            ver = Version.from_upload(upload_form.cleaned_data["upload"], addon, [amo.PLATFORM_ALL])

            # Update addon status now that the new version was saved.
            addon.update_status()

            res = run_validator(ver.all_files[0].file_path)
            validation_result = json.loads(res)

            # Set all detected features as True and save them.
            keys = ["has_%s" % feature.lower() for feature in validation_result["feature_profile"]]
            data = defaultdict.fromkeys(keys, True)

            # Set "Smartphone-Sized Displays" if it's a mobile-only app.
            qhd_devices = (
                set((amo.DEVICE_GAIA,)),
                set((amo.DEVICE_MOBILE,)),
                set((amo.DEVICE_GAIA, amo.DEVICE_MOBILE)),
            )
            if set(addon.device_types) in qhd_devices or mobile_only:
                data["has_qhd"] = True

            # Update feature profile for this version.
            ver.features.update(**data)

            messages.success(request, _("New version successfully added."))
            log.info(
                "[Webapp:%s] New version created id=%s from upload: %s"
                % (addon, ver.pk, upload_form.cleaned_data["upload"])
            )
            return redirect(addon.get_dev_url("versions.edit", args=[ver.pk]))

    ctx = {"addon": addon, "webapp": webapp, "form": form, "upload_form": upload_form}

    # Used in the delete version modal.
    if addon.is_packaged:
        versions = addon.versions.values("id", "version")
        version_strings = dict((v["id"], v) for v in versions)
        version_strings["num"] = len(versions)
        ctx["version_strings"] = json.dumps(version_strings)

    if addon.status == amo.STATUS_REJECTED:
        try:
            entry = (
                AppLog.objects.filter(addon=addon, activity_log__action=amo.LOG.REJECT_VERSION.id).order_by("-created")
            )[0]
        except IndexError:
            entry = None
        # This contains the rejection reason and timestamp.
        ctx["rejection"] = entry and entry.activity_log

    return jingo.render(request, "developers/apps/status.html", ctx)
Example #39
0
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
# 
# biodoop-core is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
# more details.
# 
# You should have received a copy of the GNU General Public License along with
# biodoop-core.  If not, see <http://www.gnu.org/licenses/>.
# 
# END_COPYRIGHT

"""
Gender encoding.
"""

from collections import defaultdict

UNKNOWN = 0
MALE = 1
FEMALE = 2

MALE_LABELS = ["MALE", "Male", "male", "M", "m", "1"]
FEMALE_LABELS = ["FEMALE", "Female", "female", "F", "f", "2"]

MAP = defaultdict.fromkeys(MALE_LABELS, MALE)
MAP.update(dict.fromkeys(FEMALE_LABELS, FEMALE))
# anything that's not either male or female is unknown
MAP.default_factory = int
Example #40
0
def ultimate_evaluate(model, extractor_name):
    genres = ['action', 'horror', 'romance']
    testingData = []
    testingLabels = []
    total = defaultdict.fromkeys(range(len(genres)), 0)
    correct = defaultdict.fromkeys(range(len(genres)), 0)
    yTrue, yPredict = [], []
    for genreIndex, genre in enumerate(genres):
        #        print "Looking for pickle file: data/{0}{1}.p".format(genre, str(num_of_videos)),
        try:
            genreFeatures = load_pkl("test/" + genre + "_test_" +
                                     extractor_name)
            genreFeatures = np.array([np.array(f)
                                      for f in genreFeatures])  # numpy hack
        except Exception as e:
            print(e)
            return
        print("OK.")
        for videoFeatures in genreFeatures:
            """to get all frames from a video -- hacky"""
            total[genreIndex] += 1
            d = defaultdict(int)
            predictedClasses = model.predict_classes(
                videoFeatures)  #List of predictions, per-frame
            print(predictedClasses)
            for i in predictedClasses:
                d[i] += 1
            predictedGenre = max(d.items(), key=lambda x: x[1])[0]
            yPredict.append(predictedGenre)
            yTrue.append(genreIndex)
            if predictedGenre == genreIndex:
                correct[genreIndex] += 1

    print(correct, total)

    confusionMatrix = confusion_matrix(yTrue, yPredict)
    print(confusionMatrix)
    '''
    tp_action = confusionMatrix[0][0]
    tp_horror = confusionMatrix[1][1]
    tp_romance = confusionMatrix[2][2]
    fp_action = confusionMatrix[1][0] + confusionMatrix[2][0]
    fp_horror = confusionMatrix[0][1] + confusionMatrix[2][1]
    fp_romance = confusionMatrix[0][2] + confusionMatrix[1][2]
    fn_action = confusionMatrix[0][1] + confusionMatrix[0][2]
    fn_horror = confusionMatrix[1][0] + confusionMatrix[1][2]
    fn_romance = confusionMatrix[2][0] + confusionMatrix[2][1]
    tn_action = confusionMatrix[1][1] + confusionMatrix[1][2] + confusionMatrix[2][1] + confusionMatrix[2][2]
    tn_horror = confusionMatrix[0][0] + confusionMatrix[0][2] + confusionMatrix[2][0] + confusionMatrix[2][2]
    tn_romance = confusionMatrix[0][0] + confusionMatrix[0][1] + confusionMatrix[1][0] + confusionMatrix[1][1]
    prec_action = tp_action/(fp_action+tp_action)
    prec_horror = tp_horror/(fp_horror+tp_horror)
    prec_romance = tp_romance/(fp_romance+tp_romance)
    rec_action = tp_action/(fn_action+tp_action)
    rec_horror = tp_horror/(fn_horror+tp_horror)
    rec_romance = tp_romance/(fn_romance+tp_romance)
    '''
    total_acc = 0
    for i in range(len(genres)):
        tp = confusionMatrix[i][i]
        fp = 0
        for j in range(len(genres)):
            if (i != j):
                fp = fp + confusionMatrix[j][i]
        fn = 0
        for j in range(len(genres)):
            if (i != j):
                fn = fn + confusionMatrix[i][j]
        tn = 0
        for j in range(len(genres)):
            for k in range(len(genres)):
                if (i != j):
                    if (i != k):
                        tn = tn + confusionMatrix[j][k]
        prec = tp / (tp + fp) * 100
        rec = tp / (tp + fn) * 100
        f1 = (prec + rec) / 2
        acc = (tp + tn) / (tp + fp + fn + tn) * 100
        print("Precision of " + genres[i] + " is " + str(round(prec, 2)) +
              "%\n")
        print("Recall of " + genres[i] + " is " + str(round(rec, 2)) + "%\n")
        print("F1 of " + genres[i] + " is " + str(round(f1, 2)) + "%\n")
        print("Accuracy of " + genres[i] + " is " + str(round(acc, 2)) + "%\n")
        print("---------------")
        total_acc = total_acc + acc
    total_acc = total_acc / 3
    print("Overall Accuracy is " + str(round(total_acc, 2)) + "%\n")
Example #41
0
data = movie.map(lambda x : (int(x[0]),int(x[1]))).groupByKey().sortByKey().map(lambda x : (int(x[0]), list(x[1])))\
    .filter(lambda x : (len(x[1])>=9))

user_movies = data.collectAsMap()


nodes = movie.map(lambda x : (int(x[0]),1)).distinct().sortByKey().map(lambda x : x[0]).collect()
print "time taken nodes is ", time.time() - START_TIME
edges= createGraph(nodes, user_movies)
#connected_users=sc.parallelize(edges).groupByKey().sortByKey()\
#                            .map(lambda x : (int(x[0]),list(x[1]))).collectAsMap()

graph= nx.Graph()
graph.add_edges_from(edges)

bet_dic =defaultdict.fromkeys(graph.edges(), 0.0)

new_bet_dic=BFS(bet_dic,list(graph.nodes()))
print "Time taken to finish btw", time.time()-START_TIME


new_bet_dic.update((k, (float(round(v/0.2,2)/10))) for k, v in new_bet_dic.items())

max_bet= sorted(new_bet_dic,key=new_bet_dic.get, reverse=True)
print "Time taken to reach upated btw", time.time()-START_TIME
grps= list()
m = graph.number_of_edges() # value of m
output = open("Shyamala_Sundararajan_Community.txt", 'w')

degree= dict()
for i in graph.nodes():
Example #42
0
def status(request, addon_id, addon, webapp=False):
    form = forms.AppAppealForm(request.POST, product=addon)
    upload_form = NewWebappVersionForm(request.POST or None, is_packaged=True,
                                       addon=addon, request=request)

    if request.method == 'POST':
        if 'resubmit-app' in request.POST and form.is_valid():
            form.save()
            perms = ('reviewer', 'senior_reviewer', 'staff')
            create_comm_thread(action='resubmit', addon=addon,
                comments=form.data['notes'], profile=request.amo_user,
                version=addon.current_version, perms=perms)

            messages.success(request, _('App successfully resubmitted.'))
            return redirect(addon.get_dev_url('versions'))

        elif 'upload-version' in request.POST and upload_form.is_valid():
            mobile_only = (addon.latest_version and
                           addon.latest_version.features.has_qhd)

            ver = Version.from_upload(upload_form.cleaned_data['upload'],
                                      addon, [amo.PLATFORM_ALL])

            # Update addon status now that the new version was saved.
            addon.update_status()

            res = run_validator(ver.all_files[0].file_path)
            validation_result = json.loads(res)

            # Set all detected features as True and save them.
            keys = ['has_%s' % feature.lower()
                    for feature in validation_result['feature_profile']]
            data = defaultdict.fromkeys(keys, True)

            # Set "Smartphone-Sized Displays" if it's a mobile-only app.
            qhd_devices = (set((amo.DEVICE_GAIA,)),
                           set((amo.DEVICE_MOBILE,)),
                           set((amo.DEVICE_GAIA, amo.DEVICE_MOBILE,)))
            if set(addon.device_types) in qhd_devices or mobile_only:
                data['has_qhd'] = True

            # Update feature profile for this version.
            ver.features.update(**data)

            messages.success(request, _('New version successfully added.'))
            log.info('[Webapp:%s] New version created id=%s from upload: %s'
                     % (addon, ver.pk, upload_form.cleaned_data['upload']))
            return redirect(addon.get_dev_url('versions.edit', args=[ver.pk]))

    ctx = {'addon': addon, 'webapp': webapp, 'form': form,
           'upload_form': upload_form}

    # Used in the delete version modal.
    if addon.is_packaged:
        versions = addon.versions.values('id', 'version')
        version_strings = dict((v['id'], v) for v in versions)
        version_strings['num'] = len(versions)
        ctx['version_strings'] = json.dumps(version_strings)

    if addon.status == amo.STATUS_REJECTED:
        try:
            entry = (AppLog.objects
                     .filter(addon=addon,
                             activity_log__action=amo.LOG.REJECT_VERSION.id)
                     .order_by('-created'))[0]
        except IndexError:
            entry = None
        # This contains the rejection reason and timestamp.
        ctx['rejection'] = entry and entry.activity_log

    if waffle.switch_is_active('preload-apps'):
        test_plan = PreloadTestPlan.objects.filter(
            addon=addon, status=amo.STATUS_PUBLIC)
        if test_plan.exists():
            test_plan = test_plan[0]
            if (test_plan.last_submission <
                settings.PREINSTALL_TEST_PLAN_LATEST):
                ctx['outdated_test_plan'] = True
            ctx['next_step_suffix'] = 'submit'
        else:
            ctx['next_step_suffix'] = 'home'
        ctx['test_plan'] = test_plan

    return jingo.render(request, 'developers/apps/status.html', ctx)
Example #43
0
def status(request, addon_id, addon):
    appeal_form = forms.AppAppealForm(request.POST, product=addon)
    upload_form = NewWebappVersionForm(request.POST or None,
                                       is_packaged=True,
                                       addon=addon,
                                       request=request)
    publish_form = forms.PublishForm(
        request.POST if 'publish-app' in request.POST else None, addon=addon)

    if request.method == 'POST':
        if 'resubmit-app' in request.POST and appeal_form.is_valid():
            if not addon.is_rated():
                # Cannot resubmit without content ratings.
                return http.HttpResponseForbidden(
                    'This app must obtain content ratings before being '
                    'resubmitted.')

            appeal_form.save()
            create_comm_note(addon,
                             addon.latest_version,
                             request.user,
                             appeal_form.data['notes'],
                             note_type=comm.RESUBMISSION)
            if addon.vip_app:
                handle_vip(addon, addon.latest_version, request.user)

            messages.success(request, _('App successfully resubmitted.'))
            return redirect(addon.get_dev_url('versions'))

        elif 'upload-version' in request.POST and upload_form.is_valid():
            upload = upload_form.cleaned_data['upload']
            ver = Version.from_upload(upload, addon)

            # Update addon status now that the new version was saved.
            addon.update_status()

            res = run_validator(ver.all_files[0].file_path)
            validation_result = json.loads(res)

            # Escalate the version if it uses prerelease permissions.
            escalate_prerelease_permissions(addon, validation_result, ver)

            # Set all detected features as True and save them.
            keys = [
                'has_%s' % feature.lower()
                for feature in validation_result['feature_profile']
            ]
            data = defaultdict.fromkeys(keys, True)

            # Set "Smartphone-Sized Displays" if it's a mobile-only app.
            qhd_devices = (set((amo.DEVICE_GAIA, )), set(
                (amo.DEVICE_MOBILE, )),
                           set((
                               amo.DEVICE_GAIA,
                               amo.DEVICE_MOBILE,
                           )))
            mobile_only = (addon.latest_version
                           and addon.latest_version.features.has_qhd)
            if set(addon.device_types) in qhd_devices or mobile_only:
                data['has_qhd'] = True

            # Update feature profile for this version.
            ver.features.update(**data)

            messages.success(request, _('New version successfully added.'))
            log.info('[Webapp:%s] New version created id=%s from upload: %s' %
                     (addon, ver.pk, upload))

            if addon.vip_app:
                handle_vip(addon, ver, request.user)

            return redirect(addon.get_dev_url('versions.edit', args=[ver.pk]))

        elif 'publish-app' in request.POST and publish_form.is_valid():
            publish_form.save()
            return redirect(addon.get_dev_url('versions'))

    ctx = {
        'addon': addon,
        'appeal_form': appeal_form,
        'is_tarako': addon.tags.filter(tag_text=QUEUE_TARAKO).exists(),
        'tarako_review':
        addon.additionalreview_set.latest_for_queue(QUEUE_TARAKO),
        'publish_form': publish_form,
        'QUEUE_TARAKO': QUEUE_TARAKO,
        'upload_form': upload_form,
    }

    # Used in the delete version modal.
    if addon.is_packaged:
        versions = addon.versions.values('id', 'version')
        version_strings = dict((v['id'], v) for v in versions)
        version_strings['num'] = len(versions)
        ctx['version_strings'] = json.dumps(version_strings)

    if addon.status == amo.STATUS_REJECTED:
        try:
            entry = (AppLog.objects.filter(
                addon=addon,
                activity_log__action=amo.LOG.REJECT_VERSION.id).order_by(
                    '-created'))[0]
        except IndexError:
            entry = None
        # This contains the rejection reason and timestamp.
        ctx['rejection'] = entry and entry.activity_log

    if waffle.switch_is_active('preload-apps'):
        test_plan = PreloadTestPlan.objects.filter(addon=addon,
                                                   status=amo.STATUS_PUBLIC)
        if test_plan.exists():
            test_plan = test_plan[0]
            if (test_plan.last_submission <
                    settings.PREINSTALL_TEST_PLAN_LATEST):
                ctx['outdated_test_plan'] = True
            ctx['next_step_suffix'] = 'submit'
        else:
            ctx['next_step_suffix'] = 'home'
        ctx['test_plan'] = test_plan

    return render(request, 'developers/apps/status.html', ctx)
Example #44
0
x_train["Depth"] = x_train["Depth"].apply(lambda depth: 0 if depth == "Subsoil" else 1)
x_train[spectra_features] = fltSpectra

n_train = df_train.shape[0]
cv_factor = 0.7
num_cv_folds = 20
train_sample_idx = range(0, n_train)
num_cv_train_samples = int(df_train.shape[0] * cv_factor)
train_sample_idx = deque(range(0, n_train))

# pca = RandomizedPCA(n_components=400)

feat_imp = np.zeros([num_cv_folds, len(train_feature_list)])

algos = ["bayesianRidge" ",adaBoost", "decisionTree", "gradBoost", "extraTree", "linear", "ridge", "svr", "randForest"]
clf = defaultdict.fromkeys(algos)
clf["adaBoost"] = ensemble.AdaBoostRegressor()
clf["decisionTree"] = DecisionTreeRegressor(random_state=0)
clf["gradBoost"] = ensemble.GradientBoostingRegressor(loss="huber", max_depth=2, n_estimators=500)
clf["extraTree"] = ensemble.ExtraTreesRegressor(n_estimators=20)
clf["linear"] = linear.LinearRegression()
clf["bayesianRidge"] = linear.BayesianRidge()
clf["ridge"] = linear.Lasso(alpha=0.1)
clf["svr"] = SVR(C=1000, kernel="poly", degree=5)
clf["randForest"] = ensemble.RandomForestRegressor(n_estimators=10, criterion="mse")

algosToTry = ["svr"]
feat_imp = pk.load(open("feature_imp_ca1.pk", "r"))
mean_imp = mean(feat_imp, axis=0)
sortIdx = np.argsort(mean_imp)
Example #45
0
def status(request, addon_id, addon):
    appeal_form = forms.AppAppealForm(request.POST, product=addon)
    upload_form = NewWebappVersionForm(request.POST or None, is_packaged=True, addon=addon, request=request)
    publish_form = forms.PublishForm(request.POST if "publish-app" in request.POST else None, addon=addon)

    if request.method == "POST":
        if "resubmit-app" in request.POST and appeal_form.is_valid():
            if not addon.is_rated():
                # Cannot resubmit without content ratings.
                return http.HttpResponseForbidden("This app must obtain content ratings before being " "resubmitted.")

            appeal_form.save()
            create_comm_note(
                addon, addon.latest_version, request.user, appeal_form.data["notes"], note_type=comm.RESUBMISSION
            )
            if addon.vip_app:
                handle_vip(addon, addon.latest_version, request.user)

            messages.success(request, _("App successfully resubmitted."))
            return redirect(addon.get_dev_url("versions"))

        elif "upload-version" in request.POST and upload_form.is_valid():
            upload = upload_form.cleaned_data["upload"]
            ver = Version.from_upload(upload, addon)

            # Update addon status now that the new version was saved.
            addon.update_status()

            res = run_validator(ver.all_files[0].file_path)
            validation_result = json.loads(res)

            # Escalate the version if it uses prerelease permissions.
            escalate_prerelease_permissions(addon, validation_result, ver)

            # Set all detected features as True and save them.
            keys = ["has_%s" % feature.lower() for feature in validation_result["feature_profile"]]
            data = defaultdict.fromkeys(keys, True)

            # Set "Smartphone-Sized Displays" if it's a mobile-only app.
            qhd_devices = (
                set((amo.DEVICE_GAIA,)),
                set((amo.DEVICE_MOBILE,)),
                set((amo.DEVICE_GAIA, amo.DEVICE_MOBILE)),
            )
            mobile_only = addon.latest_version and addon.latest_version.features.has_qhd
            if set(addon.device_types) in qhd_devices or mobile_only:
                data["has_qhd"] = True

            # Update feature profile for this version.
            ver.features.update(**data)

            messages.success(request, _("New version successfully added."))
            log.info("[Webapp:%s] New version created id=%s from upload: %s" % (addon, ver.pk, upload))

            if addon.vip_app:
                handle_vip(addon, ver, request.user)

            return redirect(addon.get_dev_url("versions.edit", args=[ver.pk]))

        elif "publish-app" in request.POST and publish_form.is_valid():
            publish_form.save()
            return redirect(addon.get_dev_url("versions"))

    ctx = {
        "addon": addon,
        "appeal_form": appeal_form,
        "is_tarako": addon.tags.filter(tag_text=QUEUE_TARAKO).exists(),
        "tarako_review": addon.additionalreview_set.latest_for_queue(QUEUE_TARAKO),
        "publish_form": publish_form,
        "QUEUE_TARAKO": QUEUE_TARAKO,
        "upload_form": upload_form,
    }

    # Used in the delete version modal.
    if addon.is_packaged:
        versions = addon.versions.values("id", "version")
        version_strings = dict((v["id"], v) for v in versions)
        version_strings["num"] = len(versions)
        ctx["version_strings"] = json.dumps(version_strings)

    if addon.status == amo.STATUS_REJECTED:
        try:
            entry = (
                AppLog.objects.filter(addon=addon, activity_log__action=amo.LOG.REJECT_VERSION.id).order_by("-created")
            )[0]
        except IndexError:
            entry = None
        # This contains the rejection reason and timestamp.
        ctx["rejection"] = entry and entry.activity_log

    if waffle.switch_is_active("preload-apps"):
        test_plan = PreloadTestPlan.objects.filter(addon=addon, status=amo.STATUS_PUBLIC)
        if test_plan.exists():
            test_plan = test_plan[0]
            if test_plan.last_submission < settings.PREINSTALL_TEST_PLAN_LATEST:
                ctx["outdated_test_plan"] = True
            ctx["next_step_suffix"] = "submit"
        else:
            ctx["next_step_suffix"] = "home"
        ctx["test_plan"] = test_plan

    return render(request, "developers/apps/status.html", ctx)
Example #46
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import joblib
from wifi_scan_bssid import get_wifis
from collections import defaultdict

model = joblib.load('../ml_data/model_randomforest.plk')
wifi_df = joblib.load('../ml_data/input_dataframe.plk')

while True:
    wifis = get_wifis()

    wifi_dict = defaultdict.fromkeys(wifi_df.columns, 0)
    for wifi in wifis:
        if wifi['bssid'] in wifi_dict:
            wifi_dict[wifi['bssid']] = int(wifi['signal'][:-1])

    wifi_df = wifi_df.append(pd.DataFrame.from_dict([wifi_dict]))
    print('current location : ' + str(model.predict(wifi_df.tail(1))[0]))