def requerimiento6(paralati, paralongi, paralatf, paralongf, graph, maplonglat, mapid): coordenadaini = (float(paralati), float(paralongi)) coordenadafin = (float(paralatf), float(paralongf)) vertices = m.keySet(maplonglat) difeinicial = float("inf") idinicial = 0 difefinal = float("inf") idfinal = 0 for i in range(1, m.size(maplonglat) + 1): vertice = lt.getElement(vertices, i) llavevalor = m.get(maplonglat, vertice) coordenada = me.getValue(llavevalor) ide = me.getKey(llavevalor) diferenciaini = hv(coordenadaini, coordenada) diferenciafinal = hv(coordenadafin, coordenada) if diferenciaini <= difeinicial: difeinicial = diferenciaini idinicial = ide if diferenciafinal <= difefinal: difefinal = diferenciafinal idfinal = ide nombrefinal = m.get(mapid, idfinal) nombrefinal = me.getValue(nombrefinal) nombreinicial = m.get(mapid, idinicial) nombreinicial = me.getValue(nombreinicial) source = djk.Dijkstra(graph, idinicial) exist = djk.hasPathTo(source, idfinal) if nombrefinal == nombreinicial: retorno = ( "La estacion más cercana de su ubicación y su lugar de interés es la misma: " + nombrefinal) elif exist: retorno = {} retorno["estacioninicial"] = nombreinicial retorno["estacionfinal"] = nombrefinal retorno["tiempo"] = djk.distTo(source, idfinal) retorno["ruta"] = djk.pathTo(source, idfinal) else: retorno = ("Estacion cercana a usted: " + nombreinicial + ", estación cercana a su sitio de interés:" + nombrefinal + ". " " No existe camino entre " + nombreinicial + " y " + nombrefinal) return retorno
def candidates_for_task_timeslots(t, ul): """ Compute the candidates for each timeslot :param t: list, a task deployed into the territory :param ul: user movements list of events :return: list of the number of users involved in each timeslot of the task """ lt = [] ls = [] ts = t[4] / t[6] * 60 # time for i in ul: if (i[3] >= t[3] and i[3] <= (t[3] + ts)): lt.append(i) # space for j in lt: t1 = (t[1], t[2]) t2 = (j[1], j[2]) distance = hv(t1, t2, 'm') # distance factor of user-i for task-j dMax = 1 - (distance / t[5]) if (distance >= 0): ls.append(j) return ls
def closestCenterHaversine(p, centers): bestIndex = 0 closest = float("+inf") for i in range(len(centers)): tempDist = hv(p, centers[i]) if tempDist < closest: closest = tempDist bestIndex = i return bestIndex
def distance(self, correct_distance=False, to_special_column=True, **kwargs): """ Calculates the distance in meters using haversine distance formula on an Activity frame. Parameters ---------- correct_distance: bool, optional It computes the distance corrected by the altitude. default is False. to_special_column: bool, optional It converts the distance calculated (`pandas.Series`) to special runpandas distance cummulative column (`runpandas.types.columns.DistancePerPosition`). Default is True. **kwargs: Keyword args to be passed to the `haversine` method Returns ------- haversine_dist: pandas.Series or runpandas.types.columns.DistancePerPosition A Series of floats representing the distance in meters with the same index of the accessed activity object. """ self._activity["point"] = self._activity.apply(lambda x: (x["lat"], x["lon"]), axis=1) self._activity["point_next"] = self._activity["point"].shift(1) self._activity.loc[self._activity["point_next"].isna(), "point_next"] = None haversine_dist = self._activity.apply( lambda x: hv(x["point"], x["point_next"], unit=Unit.METERS) if x["point_next"] is not None else float("nan"), axis=1, ) self._activity.drop(["point_next", "point"], axis=1, inplace=True) if correct_distance: haversine_dist = self.__correct_distance(haversine_dist) if to_special_column: haversine_dist = columns.DistancePerPosition(haversine_dist) return haversine_dist
if(DistanceMethod == "Euclidean"): closest = data.map(mapEuclidean) elif(DistanceMethod == "GreateCircle"): closest = data.map(mapHaversine) else: print("WTF DID YOU SAY? MAN, GreateCircle/Euclidean please") exit(-1) pointStats = closest.reduceByKey(testMethod,5) #pointStats = closest.reduceByKey(testMethod) newPoints = pointStats.map( lambda st: (st[0], st[1][0] / st[1][1])).collect() if(DistanceMethod == "Euclidean"): tempDist = sum(np.sum((kPoints[iK] - p) ** 2) for (iK, p) in newPoints) else: tempDist = sum(hv(kPoints[iK], p) for (iK, p) in newPoints) for (iK, p) in newPoints: kPoints[iK] = p if tempDist < global_min: pointsInfo = pointStats global_min = tempDist it = it + 1 pointsInfo = pointsInfo.collect() dirPath = "./step3.Output/"+dirName if not os.path.exists(dirPath): os.makedirs(dirPath) outputFilePath ="./step3.Output/" + dirName+ "/cluster_centers.csv" result = [] for ele in kPoints:
def main(): try: #creates a dataframe that reads into text file cmtTd = plib.read_table('TrainingData.txt') #set column labeled Id1 to be the index of dataFrame and converts data frame to a csv file cmtTd.set_index('Id1', inplace=True) cmtTd.to_csv('matchCmtPlaces_file.csv') #convert new csv file into list newcmtTd = plib.read_csv('matchCmtPlaces_file.csv') matchCmtPlaces_Array = num.array(newcmtTd) matchCmtPlaces_Array_List = matchCmtPlaces_Array.tolist() #create empty lists that will hold the levenshtein distance as per comparison of each pair of data items in the list matched_Names = [] hammed_distance = [] damerau_distance = [] jaccard_sim = [] distances = [] decision = [] #compare the Names i.e (Name1 - item[1] and Name2-item[7]), Latitudes(Latitude1 and Latitude2) and Longitudes using the levenshtein edit distance for item in matchCmtPlaces_Array_List: match_Names = jf.levenshtein_distance(unicode(str(item[1])), unicode(str(item[7]))) hm_distance = jf.hamming_distance(unicode(str(item[1])), unicode(str(item[7]))) dm_distance = jf.damerau_levenshtein_distance( unicode(str(item[1])), unicode(str(item[7]))) #convert each name into a list of characters to compute jaccard similarity name1 = list(item[1]) name2 = list(item[7]) intersection_cardinality = len( set.intersection(*[set(name1), set(name2)])) union_cardinality = len(set.union(*[set(name1), set(name2)])) place_record_1 = (item[4], item[5]) place_record_2 = (item[10], item[11]) distanceBtnPlaces = hv(place_record_1, place_record_2) matched_Names.append(match_Names) jaccard_sim.append(intersection_cardinality / float(union_cardinality)) hammed_distance.append(hm_distance) damerau_distance.append(dm_distance) distances.append(distanceBtnPlaces) decision.append(item[12]) match_output_dataFrame = plib.DataFrame({ 'Levenshtein-distance': matched_Names, 'damerau_distance': damerau_distance, 'Jaccard-similarity': jaccard_sim, 'Hamming-distance': hammed_distance, 'Haversine-distance': distances, 'decision': decision }) match_output_dataFrame.set_index('Levenshtein-distance', inplace=True) match_output_dataFrame.to_csv('final_output_file.csv') print(match_output_dataFrame) except (IOError, ValueError, TypeError) as e: print(e)
def haversine_(point1, point2, unit='mi'): """ haversine value computation unit. """ check_input(point1) check_input(point2) return hv(point1, point2, unit)
def haversine_(point1, point2, unit='mi'): check_input(point1) check_input(point2) return hv(point1, point2, unit)
def haversine_(point1, point2, miles=True): check_input(point1) check_input(point2) return hv(point1, point2, miles)