def distance_array(self, separate):
        """Returns a list containing the distances from each email to the center."""
        train_examples = self.active_unlearner.driver.tester.train_examples

        if separate:
            if self.working_set is None:
                dist_list = [(distance(self.clustroid, train, self.opt), train) for train in chain(train_examples[0],
                                                                                                   train_examples[1],
                                                                                                   train_examples[2],
                                                                                                   train_examples[3])
                             if train.train in self.train]

            else:
                dist_list = [(distance(self.clustroid, train, self.opt), train) for train in self.working_set if
                             train.train in self.train]
                assert(len(dist_list) > 0)
        else:
            if self.working_set is None:
                dist_list = [(distance(self.clustroid, train, self.opt), train) for train in chain(train_examples[0],
                                                                                                   train_examples[1],
                                                                                                   train_examples[2],
                                                                                                   train_examples[3])]

            else:
                dist_list = [(distance(self.clustroid, train, self.opt), train) for train in self.working_set]

        if self.sort_first:
            dist_list.sort()

        return dist_list
    def weighted_initial(self, working_set, mislabeled):
        if mislabeled is None: # Note that mislabeled is sorted in descending order by fabs(.50-email.prob)
            mislabeled = self.get_mislabeled()
        t_e = self.driver.tester.train_examples

        print "Total Cluster Centroids Chosen: ", len(self.mislabeled_chosen)

        possible_centroids = list(mislabeled - self.mislabeled_chosen)

        print len(possible_centroids), " mislabeled emails remaining as possible cluster centroids" 
        if len(possible_centroids) == 0: #No more centers to select
            return NO_CENTROIDS
        else:
            possible_centroids.sort(key=lambda x: fabs(.50-x.prob), reverse=True)

            mislabeled_point = possible_centroids[0] # Choose most potent mislabeled email
            self.mislabeled_chosen.add(mislabeled_point)

            print "Chose the mislabeled point: ", mislabeled_point.tag
            print "Probability: ", mislabeled_point.prob

            init_email = None

            training = chain(t_e[0], t_e[1], t_e[2], t_e[3]) if working_set is None else working_set
            if "frequency" in self.distance_opt:
                min_distance = sys.maxint
                mislabeled_point_frequencies = helpers.get_word_frequencies(mislabeled_point)
                for email in training:
                    current_distance = distance(email, mislabeled_point_frequencies, self.distance_opt)
                    if current_distance < min_distance:
                        init_email = email
                        min_distance = current_distance
            elif self.distance_opt == "intersection":
                min_distance = -1
                for email in training: # select closest email to randomly selected mislabeled test email
                    current_distance = distance(email, mislabeled_point, self.distance_opt)
                    if current_distance > min_distance:
                        init_email = email
                        min_distance = current_distance
            else:
                min_distance = sys.maxint
                for email in training: # select closest email to randomly selected mislabeled test email
                    current_distance = distance(email, mislabeled_point, self.distance_opt)
                    if current_distance < min_distance:
                        init_email = email
                        min_distance = current_distance
            print type(init_email)
            
            if init_email is None:
                print "Training emails remaining: ", training
            else:
                print "-> selected ", init_email.tag, " as cluster centroid with distance of ", min_distance, " from mislabeled point"

            return init_email
Example #3
0
 def testDistance(self):
     v="PLEASANTLY"
     w="MEANLY"
     d,vv,ww = distance(v, w)
     print(vv)
     print(ww)
     assert d == 5
Example #4
0
def invite_customer(file_name, invite_dist):

    #list that will contain all the customers who would be invited
    customer_list = []

    try:
        #opening the file containing customer list in read mode
        with open(file_name, "r") as fp:
            #fetching the records line by line
            for cust in fp:

                # converting the text into json for easier access
                cust_json = json.loads(cust)

                #calling the distance function to calculate the actual distance by passing the customer positions
                dist = distance(float(cust_json['latitude']),
                                float(cust_json['longitude']))

                if dist < invite_dist:
                    customer_list.append(
                        [cust_json['user_id'], cust_json['name'], dist])

        #Calling the the print function to print the customer list

    except (OSError, IOError):
        print("unable to read the file {}".format(file_name))

    else:
        #closing the file
        fp.close()
        return customer_list
Example #5
0
def ByWhat3Words():
    form = What3WordsForm(request.form)
    if request.method == 'POST' and form.validate():
        geocoder = what3words.Geocoder("R4IPMCP6")
        try:
            data1 = geocoder.convert_to_coordinates(form.W3WAddress1.data)
            data2 = geocoder.convert_to_coordinates(form.W3WAddress2.data)
            
            lat1 = float(data1['coordinates']['lat'])
            lng1 = float(data1['coordinates']['lng'])
            lat2 = float(data2['coordinates']['lat'])
            lng2 = float(data2['coordinates']['lng'])

            dis = str(round(distance(lat1, lng1, lat2, lng2),2))

            appID = "QV50Cg9nKusKIxU0xuxn"
            appCode = "MtWxs2XaYo4z_X8jc1n_9Q"

            imageurl = "https://image.maps.api.here.com/mia/1.6/route?r0=" + str(lat1) + "%2C" + str(lng1) + "%2C" + str(lat2) + "%2C" + str(lng2) + "&m0=" + str(lat1) + "%2C" + str(lng1) + "%2C" + str(lat2) + "%2C" + str(lng2) + "&lc0=dc85ff&sc0=000000&lw0=6&w=500&app_id=" + appID + "&app_code=" + appCode
            return render_template('ByWhat3Words.html', form=form, dis = dis, url = imageurl)
        except:
            traceback.print_exc()
            return render_template('ByWhat3Words.html', form=form, dis = "Error - please try again")
    else:
        return render_template('ByWhat3Words.html', form=form, dis = "")

    return render_template('ByWhat3Words.html', form=form)
    def mislabeled_initial(self, working_set, mislabeled):
        """Chooses an arbitrary point from the mislabeled emails and returns the training email closest to it."""
        if mislabeled is None:
            mislabeled = self.get_mislabeled()
        t_e = self.driver.tester.train_examples

        print "Total Chosen: ", len(self.mislabeled_chosen)

        try:
            mislabeled_point = choice(list(mislabeled - self.mislabeled_chosen))
            self.mislabeled_chosen.add(mislabeled_point)
        except:
            raise AssertionError(str(mislabeled))

        min_distance = sys.maxint
        init_email = None

        training = chain(t_e[0], t_e[1], t_e[2], t_e[3]) if working_set is None else working_set

        for email in training:
            current_distance = distance(email, mislabeled_point, self.distance_opt)
            if current_distance < min_distance:
                init_email = email
                min_distance = current_distance

        return init_email
Example #7
0
 def testFittingDistance(self):
     v="TAGGCTTA"
     w="TAGATA"
     d,vv,ww = distance(v, w)
     print(vv)
     print(ww)
     assert d == 5
Example #8
0
def find_nearest_neighbours(p: np.array,
                            points: np.array,
                            k: int = 5) -> np.array:
    """
    Find the k nearest neighbours of point p in points and return them.
    """
    distances = np.zeros(points.shape[0])
    for i in range(len(points)):
        distances[i] = distance(p, points[i])
    indices = np.argsort(distances)
    return indices[:k]
Example #9
0
    def distance_array(self):
        train_examples = self.active_unlearner.driver.tester.train_examples
        if self.working_set is None:
            """
            for i in range(len(self.active_unlearner.driver.tester.train_examples)):
                for train in self.active_unlearner.driver.tester.train_examples[i]:
                    if train != self.clustroid:
                        dist_list.append((distance(self.clustroid, train, self.opt), train))
            """
            dist_list = [(distance(self.clustroid, train, self.opt), train) for train in chain(train_examples[0],
                                                                                               train_examples[1],
                                                                                               train_examples[2],
                                                                                               train_examples[3])]

        else:
            dist_list = [(distance(self.clustroid, train, self.opt), train) for train in self.working_set]

        if self.sort_first:
            dist_list.sort()

        return dist_list
def DistRank_F(address):
    zip_code = [
        '20001', '20002', '20003', '20004', '20005', '20006', '20007', '20008',
        '20009', '20010', '20011', '20012', '20015', '20016', '20017', '20018',
        '20019', '20020', '20024', '20032', '20036', '20037', '22201', '22202',
        '22203', '22204', '22205', '22206', '22207', '22209', '22211', '22213',
        '22214', '22301', '22302', '22304', '22305', '22311', '22314'
    ]
    # three types of isochrone
    types = ["driving", "cycling", "walking"]
    # time of isochrone
    times = [10, 30, 60]
    # add isochrone area to dataframe
    df = pd.DataFrame(zip_code, columns={"Zip Code"})

    from geopy.geocoders import Nominatim

    # create a random name of agent so that the service will not time out
    agent = "distance" + str(random.randint(0, 100))

    geolocator = Nominatim(user_agent=agent)
    location = geolocator.geocode(address)
    # loc is the location of address
    loc = (location.longitude, location.latitude)
    # zi_p is the zip code of address
    zi_p = (str.split(location.address, ",")[-2]).lstrip()

    for i in range(len(types)):
        ty_pe = types[i]
        for j in range(len(times)):
            time = times[j]
            name = 'Ranking of '+str(types[i]) + \
                " "+str(times[j])+" "+"of Family"
            li_st = []
            for k in range(len(zip_code)):
                li_st.append(distance(loc, zip_code[k], ty_pe, time))

            df[name] = li_st
            df[name] = ranking(df[name])
            df.loc[df["Zip Code"] == zi_p, name] = 5
        name_col = str(ty_pe.title()) + " Area of Family"
        cols = []
        for m in df.columns:
            if ((ty_pe in str.split(m, " "))
                    and ("Family" in str.split(m, " "))):
                cols.append(m)

        df[name_col] = df[cols].apply(lambda x: x.sum(), axis=1)

        df[name_col] = ranking(df[name_col])

    return df
Example #11
0
    def distance_array(self, separate):
        """Returns a list containing the distances from each email to the center."""
        train_examples = self.active_unlearner.driver.tester.train_examples

        if separate:  # if true, all emails must be same type (spam or ham) as centroid
            if self.working_set is None:
                if "frequency" in self.opt:
                    print "     Creating Distance Array using frequency method"
                    dist_list = [(distance(train, self.cluster_word_frequency,
                                           self.opt), train) for train in
                                 chain(train_examples[0], train_examples[1],
                                       train_examples[2], train_examples[3])
                                 if train.train in self.train]
                else:
                    dist_list = [(distance(self.clustroid, train,
                                           self.opt), train) for train in
                                 chain(train_examples[0], train_examples[1],
                                       train_examples[2], train_examples[3])
                                 if train.train in self.train]
            else:
                if "frequency" in self.opt:
                    print "     Creating Distance Array using frequency method"
                    dist_list = [(distance(train, self.cluster_word_frequency,
                                           self.opt), train)
                                 for train in self.working_set
                                 if train.train in self.train]

                else:
                    dist_list = [(distance(self.clustroid, train,
                                           self.opt), train)
                                 for train in self.working_set
                                 if train.train in self.train]
                    assert (len(dist_list) > 0)

        else:
            if self.working_set is None:
                dist_list = [
                    (distance(self.clustroid, train, self.opt), train)
                    for train in chain(train_examples[0], train_examples[1],
                                       train_examples[2], train_examples[3])
                ]

            else:
                dist_list = [(distance(self.clustroid, train, self.opt), train)
                             for train in self.working_set]

        if self.sort_first:
            dist_list.sort(
            )  # sorts tuples by first element default, the distance

        if self.opt == "intersection":
            dist_list = dist_list[::-1]
            return dist_list  # reverse the distance list so that closest element is at start
        print "\n ----------------Generated Distance Array----------------\n"
        print[email[0] for email in dist_list[:5]]

        return dist_list
def cluster_au(au, gold=False, pos_cluster_opt=0):
    """Clusters the training space of an ActiveUnlearner and returns the list of clusters."""
    print "\n-----------------------------------------------------\n"
    cluster_list = []
    training = au.shuffle_training()
    print "\nResetting mislabeled...\n"
    mislabeled = au.get_mislabeled(update=True)
    au.mislabeled_chosen = set()
    print "\nClustering...\n"
    original_training_size = len(training)
    while len(training) > 0:
        print "\n-----------------------------------------------------\n"
        print "\n" + str(len(training)) + " emails out of " + str(original_training_size) + \
              " still unclustered. TEST1\n"

        current_seed = cluster_methods(au, "mislabeled", training, mislabeled)
        
        pre_cluster_rate = au.current_detection_rate

        # Sort TRAINING w.r.t. seed
        
        sorted_list = [(distance(current_seed, train, "inv-match"), train) for train in training]
        sorted_list.sort(key=operator.itemgetter(0))

        #print "\n\n\nSorted List\n\n\n"
        #print sorted_list
        #print "\n\n\nEND\n\n\n"

        cluster_result = determine_cluster(current_seed, au, working_set=training, gold=gold, impact=True,
                                           pos_cluster_opt=pos_cluster_opt)
        #while cluster_result is None:
            #current_seed = cluster_methods(au, "mislabeled", training, mislabeled)
            #cluster_result = determine_cluster(current_seed, au, working_set=training, gold=gold, impact=True,
            #                                   pos_cluster_opt=pos_cluster_opt)
        net_rate_change, cluster = cluster_result

        post_cluster_rate = au.current_detection_rate

        assert(post_cluster_rate == pre_cluster_rate), str(pre_cluster_rate) + " " + str(post_cluster_rate)

        cluster_list.append([net_rate_change, cluster])
        print "\nRemoving cluster from shuffled training set...\n"
        for email in cluster.cluster_set:
            training.remove(email)

    cluster_list.sort()
    print "\nClustering process done and sorted.\n"
    return cluster_list
    def row_sum_initial(self, working_set, mislabeled):
        """Returns the email with the smallest row sum from the set of mislabeled emails."""
        if mislabeled is None:
            mislabeled = self.get_mislabeled()
        t_e = self.driver.tester.train_examples
        minrowsum = sys.maxint
        init_email = None

        training = chain(t_e[0], t_e[1], t_e[2], t_e[3]) if working_set is None else working_set

        for email in training:
            rowsum = 0
            for email2 in mislabeled:
                dist = distance(email, email2, self.distance_opt)
                rowsum += dist ** 2
            if rowsum < minrowsum:
                minrowsum = rowsum
                init_email = email

        return init_email
Example #14
0
    def distance_array(self, separate):
        """Returns a list containing the distances from each email to the center."""
        train_examples = self.active_unlearner.driver.tester.train_examples

        if separate: # if true, all emails must be same type (spam or ham) as centroid
            if self.working_set is None:
                if "frequency" in self.opt:
                    print "     Creating Distance Array using frequency method"
                    dist_list = [(distance(train, self.cluster_word_frequency, self.opt), train) for train in chain(train_examples[0],
                                                                                                   train_examples[1],
                                                                                                   train_examples[2],
                                                                                                   train_examples[3])
                                                                    if train.train in self.train]
                else: 
                    dist_list = [(distance(self.clustroid, train, self.opt), train) for train in chain(train_examples[0],
                                                                                                       train_examples[1],
                                                                                                       train_examples[2],
                                                                                                       train_examples[3])
                                 if train.train in self.train]
            else:
                if "frequency" in self.opt:
                    print "     Creating Distance Array using frequency method"
                    dist_list = [(distance(train, self.cluster_word_frequency, self.opt), train) for train in self.working_set if
                                 train.train in self.train]
                    
                else:
                    dist_list = [(distance(self.clustroid, train, self.opt), train) for train in self.working_set if
                                 train.train in self.train]
                    assert(len(dist_list) > 0)
                
                
        else:
            if self.working_set is None:
                dist_list = [(distance(self.clustroid, train, self.opt), train) for train in chain(train_examples[0],
                                                                                                   train_examples[1],
                                                                                                   train_examples[2],
                                                                                                   train_examples[3])]

            else:
                dist_list = [(distance(self.clustroid, train, self.opt), train) for train in self.working_set]

        if self.sort_first:
            dist_list.sort() # sorts tuples by first element default, the distance

        if self.opt == "intersection":
            dist_list = dist_list[::-1]
            return dist_list # reverse the distance list so that closest element is at start
        print "\n ----------------Generated Distance Array----------------\n"
        print [email[0] for email in dist_list[:5]]
Example #15
0
def ByCoordinate():
    form = CoordinateForm(request.form)
    if request.method == 'POST' and form.validate():
        try:
            lat1 = float(form.Latitude1.data) * float(pi) / float(180)
            lon1 = float(form.Longitude1.data) * float(pi) / float(180)
            lat2 = float(form.Latitude2.data) * float(pi) / float(180)
            lon2 = float(form.Longitude2.data) * float(pi) / float(180)

            dis = str(round(distance(lat1, lon1, lat2, lon2),2))

            appID = "QV50Cg9nKusKIxU0xuxn"
            appCode = "MtWxs2XaYo4z_X8jc1n_9Q"

            imageurl = "https://image.maps.api.here.com/mia/1.6/route?r0=" + str(form.Latitude1.data) + "%2C" + str(form.Longitude1.data) + "%2C" + str(form.Latitude2.data) + "%2C" + str(form.Longitude2.data) + "&m0=" + str(form.Latitude1.data) + "%2C" + str(form.Longitude1.data) + "%2C" + str(form.Latitude2.data) + "%2C" + str(form.Longitude2.data) + "&lc0=dc85ff&sc0=000000&lw0=6&w=500&app_id=" + appID + "&app_code=" + appCode
            return render_template('ByCoordinate.html', form=form, dis = dis, url = imageurl)
        except:
            traceback.print_exc()
            return render_template('ByCoordinate.html', form=form, dis = "Error - please try again")
    else:
        return render_template('ByCoordinate.html', form=form, dis = "")
Example #16
0
 def test_four(self):
     res = distance(-1, 1, 1, -1)
     self.assertEqual(res, 8**0.5)
Example #17
0
 def test_three(self):
     res = distance(1, 1, 1, 1)
     self.assertEqual(res, 0)
Example #18
0
 def test_two(self):
     res = distance(0, 0, 1, 1)
     self.assertEqual(res, 2**0.5)
        # this is redundant since current version is 2-D but required since distance and area functions are generic 3-D
        tmp_z_pos = (dim - 2) * rand.uniform(z_part_domain[0],
                                             z_part_domain[1])

        # first particle
        if global_count == 0:
            particles_global[global_count, 2] = tmp_x_pos
            particles_global[global_count, 3] = tmp_y_pos

            global_count = global_count + 1
            count = count + 1

        else:

            # check for overlap with all previous particles
            dist_bol = distance(dim,global_count,particles_global,tmp_x_pos,tmp_y_pos,tmp_z_pos,dist_tol, \
                            x_part_domain_len,y_part_domain_len,z_part_domain_len)

            #print(dist_bol)
            if dist_bol == True:
                particles_global[global_count, 2] = tmp_x_pos
                particles_global[global_count, 3] = tmp_y_pos

                if dim == 3:
                    particles_global[global_count, 4] = tmp_z_pos

                global_count = global_count + 1
                count = count + 1

direction = 1

area_fraction(direction, dim, Dp, particles_global, x_part_domain,
    def distance_array(self, separate):
        """Returns a list containing the distances from each email to the center."""
        train_examples = self.active_unlearner.driver.tester.train_examples

        if separate:
            if self.working_set is None:
                dist_list = [(distance(self.clustroid, train, self.opt), train) for train in chain(train_examples[0],
                                                                                                   train_examples[1],
                                                                                                   train_examples[2],
                                                                                                   train_examples[3])
                             if train.train in self.train]

            else:
                dist_list = [(distance(self.clustroid, train, self.opt), train) for train in self.working_set if
                             train.train in self.train]
                assert(len(dist_list) > 0)
        else:
            if self.working_set is None:
                dist_list = [(distance(self.clustroid, train, self.opt), train) for train in chain(train_examples[0],
                                                                                                   train_examples[1],
                                                                                                   train_examples[2],
                                                                                                   train_examples[3])]

            else:
                if self.moving:
                    dist_list = []
                    for index, train in enumerate(self.working_set):
                        if index + 1 < len(self.working_set):

                            dist1 = distance(self.clustroid, self.working_set[index], self.opt)
                            dist2 = distance(self.clustroid, self.working_set[index + 1], self.opt)
                            
                            # if top of queue isn't closest to new centroid, take the next item as the closest and 
                            # swap their positions in the queue
                            if dist1 < dist2:
                                temp = dist2
                                dist2 = dist1
                                dist1 = temp
                                
                                temp = self.working_set[index + 1]
                                self.working_set[index + 1] = self.working_set[index]
                                self.working_set[index] = temp

                            dist_list.append((dist1, train))
                            # get common feature space - make zombie
                            commonFeatureSpace = zombify(self.clustroid, self.working_set[index])
                            self.clustroid.clues = commonFeatureSpace
                        
                        else:
                            dist1 = distance(self.clustroid, self.working_set[index], self.opt)
                            dist_list.append((dist1, train))
                            # get common feature space - make zombie
                            commonFeatureSpace = zombify(self.clustroid, self.working_set[index])
                            self.clustroid.clues = commonFeatureSpace
                    
                    dist_list = [(distance(self.clustroid, train, self.opt), train) for train in self.working_set]

        if self.sort_first:
            dist_list.sort()

        return dist_list
Example #21
0
 def update_dist_list(self, separate=True):
     """Updates self.dist_list for the frequency[1,2] method"""
     emails = [train[1] for train in self.dist_list]  # get array of emails
     self.dist_list = [(distance(train, self.cluster_word_frequency,
                                 self.opt), train) for train in emails]
     self.dist_list.sort()
Example #22
0
 def test_zero(self):
     res = distance(0, 0, 0, 0)
     self.assertEqual(res, 0)
Example #23
0
def chosen_sum(chosen, x, opt=None):
    s = 0
    for msg in chosen:
        s += distance(msg, x, opt)
    return s
Example #24
0
def webhook():
    message = request.get_json()  #messages from users are fetched
    log(message)
    print(message)

    if message['object'] == 'page':
        for entry in message['entry']:
            for messaging_event in entry['messaging']:

                #Extracting all IDs
                sender_id = messaging_event['sender']['id']
                recipient_id = messaging_event['recipient']['id']

                if messaging_event.get('message'):
                    if 'text' in messaging_event['message']:
                        messaging_text = messaging_event['message']['text']
                    else:
                        messaging_text = 'NoText'

                    # ECHO
                    response = None

                    entities, values, dictionary_of_values_and_entities = wit_response(
                        messaging_text)

                    for entity in entities:
                        if entity == 'get_class':
                            response = classes(entities, entity, values)
                            if response == None:
                                response = "Sorry! I can not find this course..."
                            # response = "OK! This are your classes of {}: ".format(str(values[entities.index(entity)]))

                        elif entity == 'feeling':
                            response = feeling(entities, entity, values)

                        elif entity == 'mood':
                            response = mood(entities, entity, values)

                        elif entity == 'greetings':
                            response = hello(entities, entity, values)

                        elif entity == 'bye':
                            response = buy(entities, entity, values)

                        elif entity == 'currency_1':
                            response = amount_of_money(entities, entity,
                                                       values)

                        elif entity == 'docfinder':
                            response = docfind(entities, entity, values)

                        elif entity == 'mensa_hours':
                            response = mensa_hours(entities, entity, values)

                        elif entity == 'useful_inf_for_inc':
                            response = useful_inf_for_inc(
                                entities, entity, values)

                        elif entity == 'fh':
                            response = fh(entities, entity, values)

                        elif entity == 'before_leaving':
                            response = before_leaving(entities, entity, values)

                        elif entity == 'tips':
                            response = tips(entities, entity, values)

                        elif entity == 'because_a':
                            response = because_a(entities, entity, values)

                        elif entity == 'fact':
                            response = facts(entities, entity, values)

                        elif entity == 'joke':
                            response = jokes(entities, entity, values)

                        elif entity == 'ok':
                            response = ok(entities, entity, values)

                        elif entity == 'weather':
                            response = weather(entities, entity, values)

                        elif entity == 'NeedHelp':
                            response = needhelp(entities, entity, values)

                        elif entity == 'no':
                            response = no(entities, entity, values)

                        elif entity == 'Start':
                            response = start(entities, entity, values)

                        elif entity == 'Stop':
                            response = stop(entities, entity, values)

                        elif entity == 'event':
                            response = events(entities, entity, values)

                        elif entity == 'game':
                            response = games(entities, entity, values)

                        elif entity == 'math':
                            response = calc(entities, entity, values)

                        elif entity == 'love_q':
                            response = love_q(entities, entity, values)

                        elif entity == 'hobby':
                            response = hobby(entities, entity, values)

                        elif entity == 'remind':
                            response = remind(entities, entity, values)

                        elif entity == 'google':
                            response = search_g(entities, entity, values)

                        elif entity == 'notable_person':
                            response = person(entities, entity, values)

                        elif entity == 'duration':
                            response = duration(entities, entity, values)

                        elif entity == 'amount_of_money':
                            response = amount_of_money(entities, entity,
                                                       values)

                        elif entity == 'thanks':
                            response = thanks(entities, entity, values)

                        elif entity == 'SayThanks':
                            response = saythanks(entities, entity, values)

                        elif entity == 'local_search':
                            response = local_search(entities, entity, values)

                        elif entity == 'distance':
                            response = distance(entities, entity, values)

                    if response == None:
                        response = "Sorry! I didn't understand your message..."

                    bot.send_text_message(sender_id, response)

    return "ok", 200
Example #25
0
 def test_one(self):
     res = distance(0, 0, 0, 1)
     self.assertEqual(res, 1)
Example #26
0
    def select_initial(self, option="mislabeled", distance_opt = "extreme"):
        """ Returns an email to be used as the initial unlearning email based on
            the mislabeled data (our tests show that the mislabeled and pollutant
            emails are strongly, ~80%, correlated) if option is true (which is default)."""
        mislabeled = self.get_mislabeled()
        t_e = self.driver.tester.train_examples
        print "Chosen: ", self.mislabeled_chosen
        print "Total Chosen: ", len(self.mislabeled_chosen)
        if option == "rowsum":
            # We want to minimize the distances (rowsum) between the email we select
            # and the mislabeled emails. This ensures that the initial email we select
            # is correlated with the mislabeled emails.

            minrowsum = sys.maxint
            init_email = None
            for email in chain(t_e[0], t_e[1], t_e[2], t_e[3]):
                rowsum = 0
                for email2 in mislabeled:
                    dist = distance(email, email2, distance_opt)
                    rowsum += dist ** 2
                if rowsum < minrowsum:
                    minrowsum = rowsum
                    init_email = email

            return init_email

        if option == "mislabeled":
            # This chooses an arbitrary point from the mislabeled emails and simply finds the email
            # in training that is closest to this point.
            try:
                mislabeled_point = choice(list(mislabeled - self.mislabeled_chosen))
                self.mislabeled_chosen.add(mislabeled_point)
            except:
                raise AssertionError(str(mislabeled))

            min_distance = sys.maxint

            for email in chain(t_e[0], t_e[1], t_e[2], t_e[3]):
                current_distance = distance(email, mislabeled_point, distance_opt)
                if current_distance < min_distance:
                    init_email = email
                    min_distance = current_distance

            return init_email

        if option == "max_sum":
            try:
                max_sum = 0

                for email in chain(t_e[0], t_e[1], t_e[2], t_e[3]):
                    current_sum = chosen_sum(self.training_chosen, email, distance_opt)
                    if current_sum > max_sum:
                        init_email = email
                        max_sum = current_sum

                self.training_chosen.add(init_email)
                return init_email

            except:
                print "Returning initial seed based off of mislabeled...\n"
                return self.select_initial(option="mislabeled")
Example #27
0
    def weighted_initial(self, working_set, mislabeled):
        if mislabeled is None:  # Note that mislabeled is sorted in descending order by fabs(.50-email.prob)
            mislabeled = self.get_mislabeled()
        t_e = self.driver.tester.train_examples

        print "Total Cluster Centroids Chosen: ", len(self.mislabeled_chosen)

        possible_centroids = list(mislabeled - self.mislabeled_chosen)

        print len(
            possible_centroids
        ), " mislabeled emails remaining as possible cluster centroids"
        if len(possible_centroids) == 0:  #No more centers to select
            return NO_CENTROIDS
        else:
            possible_centroids.sort(key=lambda x: fabs(.50 - x.prob),
                                    reverse=True)

            mislabeled_point = possible_centroids[
                0]  # Choose most potent mislabeled email
            self.mislabeled_chosen.add(mislabeled_point)

            print "Chose the mislabeled point: ", mislabeled_point.tag
            print "Probability: ", mislabeled_point.prob

            init_email = None

            training = chain(t_e[0], t_e[1], t_e[2],
                             t_e[3]) if working_set is None else working_set
            if "frequency" in self.distance_opt:
                min_distance = sys.maxint
                mislabeled_point_frequencies = helpers.get_word_frequencies(
                    mislabeled_point)
                for email in training:
                    current_distance = distance(email,
                                                mislabeled_point_frequencies,
                                                self.distance_opt)
                    if current_distance < min_distance:
                        init_email = email
                        min_distance = current_distance
            elif self.distance_opt == "intersection":
                min_distance = -1
                for email in training:  # select closest email to randomly selected mislabeled test email
                    current_distance = distance(email, mislabeled_point,
                                                self.distance_opt)
                    if current_distance > min_distance:
                        init_email = email
                        min_distance = current_distance
            else:
                min_distance = sys.maxint
                for email in training:  # select closest email to randomly selected mislabeled test email
                    current_distance = distance(email, mislabeled_point,
                                                self.distance_opt)
                    if current_distance < min_distance:
                        init_email = email
                        min_distance = current_distance
            print type(init_email)

            if init_email is None:
                print "Training emails remaining: ", training
            else:
                print "-> selected ", init_email.tag, " as cluster centroid with distance of ", min_distance, " from mislabeled point"

            return init_email
def chosen_sum(chosen, x, opt=None):
    """Given a given msg and a set of chosen emails, returns the sum of distances from the given msg."""
    s = 0
    for msg in chosen:
        s += distance(msg, x, opt)
    return s
Example #29
0
    def test_distance(self):

        test_dist = distance(54.180238,-5.920898)
        self.assertEqual(int(test_dist), 96)
Example #30
0
 def update_dist_list(self, separate=True): 
     """Updates self.dist_list for the frequency[1,2] method"""
     emails = [train[1] for train in self.dist_list] # get array of emails
     self.dist_list = [(distance(train, self.cluster_word_frequency, self.opt), train) for train in emails]
     self.dist_list.sort()