def main(args): """ """ with open(args.input, 'r') as fh: data = fh.read().split('\n') allergens_map = defaultdict(list) all_ingred = list() for d in data: res = re.match('(.+)\(contains(.+)\)', d) if res: ingredients = res.group(1).rstrip().lstrip().split(' ') allergens = res.group(2).rstrip().lstrip().split(', ') for a in allergens: allergens_map[a].append(set(ingredients)) all_ingred.append(ingredients) poison = list() graph = {} for a in allergens_map: i = set.intersection(*allergens_map[a]) graph[a] = i for entry in i: if entry not in poison: poison.append(entry) # print(all_ingred) c = 0 for a in all_ingred: for i in a: if i not in poison: c += 1 print(c) # https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm matcb = HopcroftKarp(graph).maximum_matching(keys_only=True) l = sorted(matcb.keys()) part2 = [] for key in l: part2.append(matcb[key]) print(",".join(part2))
def TOPOWithPairs(GPSMap, OSMMap, GPSList, OSMList, step=0.00005, r=0.00300, threshold=0.00015, region=None, outputfile="tmp.txt", one2oneMatching=True, metaData=None): i = 0 precesion_sum = 0 recall_sum = 0 print(len(OSMList), len(GPSList.keys())) rrr = float(len(GPSList.keys())) / float(len(OSMList)) print("Overall Coverage", rrr) returnResult = [] for k, itemGPS in GPSList.iteritems(): itemOSM = OSMList[k] gpsn1, gpsn2, gpsd1, gpsd2 = itemGPS[1], itemGPS[2], itemGPS[ 3], itemGPS[4] osmn1, osmn2, osmd1, osmd2 = itemOSM[2], itemOSM[3], itemOSM[ 4], itemOSM[5] osm_start_lat, osm_start_lon = itemOSM[0], itemOSM[1] gps_start_lat, gps_start_lon = itemGPS[5], itemGPS[6] # nid = pairs[min_node] # lat = GPSMap.nodes[nid][0] # lon = GPSMap.nodes[nid][1] lat = itemOSM[0] lon = itemOSM[1] ts1 = time() marbles = GPSMap.TOPOWalk(1, step=step, r=r, direction=False, newstyle=True, nid1=gpsn1, nid2=gpsn2, dist1=gpsd1, dist2=gpsd2) # for recall holes = OSMMap.TOPOWalk(1, step=step, r=r, direction=False, newstyle=True, nid1=osmn1, nid2=osmn2, dist1=osmd1, dist2=osmd2, metaData=metaData) # remove holes in tunnel # for precision holes_bidirection = OSMMap.TOPOWalk( 1, step=step, r=r, direction=False, newstyle=True, nid1=osmn1, nid2=osmn2, dist1=osmd1, dist2=osmd2, bidirection=True, metaData=None) # don't remove holes in tunnel ts2 = time() idx_marbles = index.Index() idx_holes = index.Index() idx_holes_bidirection = index.Index() for j in range(len(marbles)): idx_marbles.insert( j, (marbles[j][0] - 0.00001, marbles[j][1] - 0.00001, marbles[j][0] + 0.00001, marbles[j][1] + 0.00001)) for j in range(len(holes)): idx_holes.insert(j, (holes[j][0] - 0.00001, holes[j][1] - 0.00001, holes[j][0] + 0.00001, holes[j][1] + 0.00001)) for j in range(len(holes_bidirection)): idx_holes_bidirection.insert( j, (holes_bidirection[j][0] - 0.00001, holes_bidirection[j][1] - 0.00001, holes_bidirection[j][0] + 0.00001, holes_bidirection[j][1] + 0.00001)) # holes_bidirection = holes # idx_holes_bidirection = idx_holes matchedNum = 0 bigraph = {} matched_marbles = [] bipartite_graph = [] cost_map = {} for marble in marbles: rr = threshold * 1.8 possible_holes = list( idx_holes_bidirection.intersection( (marble[0] - rr, marble[1] - rr, marble[0] + rr, marble[1] + rr))) for hole_id in possible_holes: hole = holes_bidirection[hole_id] ddd = distance(marble, hole) n1 = latlonNorm((marble[2], marble[3])) n2 = latlonNorm((hole[2], hole[3])) #ddd += (1.0 - abs(n1[0] * n2[0] + n1[1] * n2[1])) * threshold * 5 #ddd -= threshold / 2 #ddd = max(ddd, 0) if marble[2] != marble[3] and hole[2] != hole[3]: angle_d = 1.0 - abs(n1[0] * n2[0] + n1[1] * n2[1]) else: angle_d = 0.0 #angle_d = 0.0 if ddd < threshold and angle_d < 0.29: # 0.03 --> 15 degrees 0.13 --> 30 degrees 0.29 --> 45 degrees #cost_map[(marble, hole_id)] = ddd if marble in bigraph.keys(): bigraph[marble].add(hole_id) else: bigraph[marble] = Set([hole_id]) bipartite_graph.append((marble, hole_id, ddd)) matchedNum += 1 matched_marbles.append(marble) #break soft_matchedNum = 0 if one2oneMatching == True: matches = HopcroftKarp(bigraph).maximum_matching() matchedNum = len(matches.keys()) / 2 # for k,v in matches.iteritems(): # if (k,v) in cost_map.keys(): # soft_matchedNum += max(min(((threshold - cost_map[(k,v)]) / threshold),1.0),0.0) #matched_marbles, matched_holes, _ = BipartiteGraphMatching(bipartite_graph) #matched_holes = [(holes_bidirection[item][0], holes_bidirection[item][1]) for item in matched_holes] #matched_marbles = [(marbles[item][0], marbles[item][1]) for item in matched_marbles] # for item in HopcroftKarp(bigraph).maximum_matching().keys(): # if type(item) is not int : # matched_marbles.append(item) print(i, len(marbles), len(holes)) if len(marbles) == 0 or len(holes) == 0: continue #precesion = float(soft_matchedNum) / len(marbles) precesion = float(matchedNum) / len(marbles) # TOPO Debug #showTOPO.RenderSVG(marbles, holes, matched_marbles,matched_holes, lat, lon, 0.00300, "svg/nn"+outputfile.split('/')[-1]+"_%.6f_"%precesion+str(i)+"_"+str(lat)+"_"+str(lon)+".svg", OSMMap= OSMMap, starts=(osm_start_lat,osm_start_lon,gps_start_lat,gps_start_lon)) matchedNum = 0 bigraph = {} cost_map = {} for hole in holes: rr = threshold * 1.8 possible_marbles = list( idx_marbles.intersection( (hole[0] - rr, hole[1] - rr, hole[0] + rr, hole[1] + rr))) for marble_id in possible_marbles: marble = marbles[marble_id] ddd = distance(marble, hole) n1 = latlonNorm((marble[2], marble[3])) n2 = latlonNorm((hole[2], hole[3])) #ddd += (1.0 - abs(n1[0] * n2[0] + n1[1] * n2[1])) * threshold * 5 #ddd -= threshold / 2 #ddd = max(ddd, 0) if marble[2] != marble[3] and hole[2] != hole[3]: angle_d = 1.0 - abs(n1[0] * n2[0] + n1[1] * n2[1]) else: angle_d = 0.0 #angle_d = 0.0 if ddd < threshold and angle_d < 0.29: #cost_map[(hole, marble_id)] = ddd if hole in bigraph.keys(): bigraph[hole].add(marble_id) else: bigraph[hole] = Set([marble_id]) matchedNum += 1 #break soft_matchedNum = 0 if one2oneMatching == True: #matchedNum = len(HopcroftKarp(bigraph).maximum_matching().keys()) / 2 matches = HopcroftKarp(bigraph).maximum_matching() matchedNum = len(matches.keys()) / 2 # for k,v in matches.iteritems(): # if (k,v) in cost_map.keys(): # soft_matchedNum += max(min(((threshold - cost_map[(k,v)]) / threshold),1.0),0.0) #recall = float(soft_matchedNum) / len(holes) recall = float(matchedNum) / len(holes) precesion_sum += precesion recall_sum += recall ts3 = time() with open(outputfile, "a") as fout: fout.write( str(i) + " " + str(lat) + " " + str(lon) + " " + str(gpsn1) + " " + str(gpsn2) + " Precesion " + str(precesion) + " Recall " + str(recall) + " Avg Precesion " + str(precesion_sum / (i + 1)) + " Avg Recall " + str(recall_sum / (i + 1)) + " \n") print(i, "Precesion", precesion, "Recall", recall, "Avg Precesion", precesion_sum / (i + 1), "Avg Recall", recall_sum / (i + 1), rrr, ts2 - ts1, ts3 - ts2) returnResult.append( (lat, lon, precesion, recall, gpsn1, gpsn2, gpsd1, gpsd2)) i = i + 1 #if i > 100: # break # try: # with open(outputfile, "a") as fout: # fout.write(str(precesion_sum/i)+" "+str(recall_sum/i)+" "+str(rrr)+ " "+ str(rrr * recall_sum/i) +"\n") # except: # with open(outputfile, "a") as fout: # fout.write(str(0)+" "+str(0)+" "+str(0)+ " "+ "0.0" +"\n") #with open("TOPOResultSummary.txt","a") as fout: # fout.write(str(precesion_sum/i)+" "+str(recall_sum/i)+" "+str(rrr)+ " "+ str(rrr * recall_sum/i) +"\n") new_topoResult = TOPO121(returnResult, GPSMap) # Debug svg # for rr in returnResult: # if rr not in new_topoResult: # print("remove rr") # Popen("rm svg/*%s*.svg" % (str(rr[0])+"_"+str(rr[1])),shell=True).wait() #print(topoAvg(returnResult), len(returnResult)/float(len(OSMList))) print(topoAvg(new_topoResult), len(new_topoResult) / float(len(OSMList))) p, r = topoAvg(new_topoResult) # with open(outputfile, "a") as fout: # fout.write(str(p)+" "+str(r)+" "+str(len(new_topoResult)/float(len(OSMList)))+"\n") print("precision=" + str(p) + " overall-recall=" + str(r * len(new_topoResult) / float(len(OSMList)))) try: with open(outputfile, "a") as fout: fout.write( str(p) + " " + str(r) + " " + str(len(new_topoResult) / float(len(OSMList))) + " " + str(r * len(new_topoResult) / float(len(OSMList))) + "\n") fout.write("precision=" + str(p) + " overall-recall=" + str(r * len(new_topoResult) / float(len(OSMList)))) except: with open(outputfile, "a") as fout: fout.write( str(0) + " " + str(0) + " " + str(0) + " " + str(0) + "\n") return new_topoResult
# calculate binary accuracy precision_multi = precision(true_positive_multi, false_positive_multi) recall_multi = recall(true_positive_multi, false_negative_multi) f_measure_multi = f_measure(true_positive_multi, false_positive_multi, false_negative_multi) print 'Multi precision: ' + str(precision_multi) print 'Multi recall: ' + str(recall_multi) print 'Multi f-score: ' + str(f_measure_multi) # calculate accuracies by label type class_accuracies = {} label_types = ['CurbRamp', 'SurfaceProblem', 'Obstacle', 'NoCurbRamp'] for label_type in label_types: gt_this_label = ground_truth[ground_truth.type == label_type] turk_this_label = turker_labels[turker_labels.type == label_type] true_pos = sum(gt_this_label.id.isin(multi_matching.keys())) false_pos = len(turk_this_label) - true_pos false_neg = len(gt_this_label) - true_pos # print label_type # print true_pos # print false_pos # print false_neg class_accuracies[label_type] = {'precision': precision(true_pos, false_pos), 'recall': recall(true_pos, false_neg), 'f-score': f_measure(true_pos, false_pos, false_neg)} class_accuracies_df = pd.DataFrame.from_dict(class_accuracies, orient='index') class_accuracies_df['label_type'] = pd.Series(class_accuracies_df.index, index=class_accuracies_df.index).astype('category') print class_accuracies_df # plot the accuracies by class as a bar chart