def parse_swim(self, filename): """ Parse a SWIM trace. """ self.collect_maxes_swim(filename) output_file = self.generate_filename(filename) with open(output_file, 'w') as out: encounters = {} bar = Bar(self.filesize, "Parsing SWIM file") with open(filename, 'r') as entrada: for i, line in enumerate(entrada): bar.progress() comps = line.strip().split(" ") comps = self.remove_empty(comps) encounter = Encounter(comps[2], comps[3]) if str(encounter) in encounters: e = encounters[str(encounter)] self.parsedfilesize += 1 out.write("{} {} ".format(comps[2], comps[3])) out.write("{} {} ".format(comps[3], e)) out.write("{} ".format(float(comps[0]) - e)) out.write("{} {} ".format(comps[4], comps[5])) out.write("{} ".format(comps[6])) out.write("{}\n".format(comps[7])) encounters[str(encounter)] = float(comps[0]) bar.finish() return output_file
def extractVenues(self): numberVenues = int(self.maxX * self.maxY / (self.r * self.r)) numberVenues = min(835, numberVenues) _set = list(self.locations.keys()) randomIndex = 0 venuesIndex = 0 bar = Bar(numberVenues, "Extracting venues") for i in range(0, numberVenues): randomIndex = random.randint(0, len(_set) - 1) while _set[randomIndex] not in self.locations: randomIndex = random.randint(0, len(_set) - 1) self.venues[venuesIndex] = _set[randomIndex] venuesIndex += 1 bar.progress() bar.finish()
def extractLocations(self): bar = Bar(self.filesize / 2, "Extracting locations") with open(self.file, 'r') as entrada: for line in entrada: split = line.strip().split(" ") key = "{} {}".format(split[5], split[6]) if key not in self.locations: self.locations[key] = self.locationsIndex self.locationsIndex += 1 key = "{} {}".format(split[7], split[8]) if key not in self.locations: self.locations[key] = self.locationsIndex self.locationsIndex += 1 bar.progress() bar.finish()
def best_fit_distribution(self, data, filename, bins=200): """ Computes and returns the distribution that best fits the data. """ y, x = np.histogram(data, bins=bins, density=True) x = (x + np.roll(x, -1))[:-1] / 2.0 DISTRIBUTIONS = [ st.dweibull, st.expon, st.gamma, st.logistic, st.lognorm, st.norm, st.pareto ] best_distribution = st.norm best_params = (0.0, 1.0) best_sse = np.inf if os.sep in filename: metric = filename.split(os.sep)[1] else: metric = filename progressbar = Bar(len(DISTRIBUTIONS), "Fitting {}".format(metric)) for distribution in DISTRIBUTIONS: warnings.filterwarnings('ignore') params = distribution.fit(data) arg = params[:-2] loc = params[-2] scale = params[-1] pdf = distribution.pdf(x, loc=loc, scale=scale, *arg) sse = np.sum(np.power(y - pdf, 2.0)) sse = -2 * math.log(sse) + 2 * (len(params) + 1) if sse < best_sse: best_distribution = distribution best_params = params best_sse = sse print(" SSE of {} is {}(Current best: {})".format( distribution.name, round(sse, 2), round(best_sse, 2)), end="") progressbar.progress() print(" Fit to {} with params [{}]".format(best_distribution.name, best_params), end="") progressbar.finish() return (best_distribution.name, best_params)
def extract(self): self.extractLocations() self.extractVenues() self.voronoi() bar = Bar(self.filesize / 2, "Extracting homes") with open(self.file, "r") as entrada: for line in entrada: line = line.strip() self.extractHome(line) bar.progress() bar.finish() bar = Bar(self.filesize / 2, "Extracting INCO, CODU, MAXCON and EDGEP") with open(self.file, "r") as entrada: for line in entrada: line = line.strip() bar.progress() self.extractMetrics(self.metrics, line) edges = self.topoGraph.edgeSet() bar.finish() if "TOPO" in self.metrics: bar = Bar(len(edges), "Extracting TOPO and SOCOR") for edge in edges: bar.progress() src = edge.src trg = edge.target enc = Encounter(int(src), int(trg)) if (enc.toString() not in self.totalNeighbors): self.totalNeighbors[enc.toString()] = [] neighborsSrc = self.topoGraph.get_vertex(src).get_connections() degreeSrc = len(neighborsSrc) neighborsTrg = self.topoGraph.get_vertex(trg).get_connections() degreeDest = len(neighborsTrg) exists = 0 if (self.topoGraph.containsEdge(src, trg)): exists = 1 to = 0 for t in neighborsTrg: if t in neighborsSrc: to += 1 numerator = float(to) + 1 denominator = ((degreeSrc - exists) + (degreeDest - exists) - to) + 1 if denominator == 0: denominator = 1 toPct = numerator / denominator self.topo[enc.toString()] = toPct bar.finish() if "EDGEP" in self.metrics: self.normalizeEDGEP() if "SOCOR" in self.metrics: self.extractSOCOR() self.printMetrics(self.metrics)
def parseSwim(self, filename): self.collectMaxesSwim(filename) newFile = self.generateFileName(filename) with open(newFile, 'w') as out: encounters = {} bar = Bar(self.filesize, "Parsing SWIM file") with open(filename, 'r') as entrada: for i, line in enumerate(entrada): bar.progress() comps = line.strip().split(" ") comps = self.removeEmpty(comps) encounter = Encounter(comps[2], comps[3]) if encounter.toString() in encounters: e = encounters[encounter.toString()] self.parsedfilesize += 1 out.write("{} {} ".format(comps[2], comps[3])) out.write("{} {} ".format(comps[3], e)) out.write("{} ".format(float(comps[0]) - e)) out.write("{} {} ".format(comps[4], comps[5])) out.write("{} ".format(comps[6])) out.write("{}\n".format(comps[7])) encounters[encounter.toString()] = float(comps[0]) bar.finish() return newFile
def naive_raw(self, filename): """ Parse a raw trace not considering the cells. """ self.pre_parse_raw(filename) radius = self.r contacts = dict() positions = dict() output_filename = self.generate_filename(filename) with open(output_filename, "w+") as saida, \ open(filename, "r") as entrada: bar = Bar(self.filesize, "Parsing RAW file") for i, line in enumerate(entrada): bar.progress() _id, x, y, time = self.get_line(line) if _id not in contacts: contacts[_id] = dict() node_position = PositionReport(x, y, time) positions[_id] = node_position for other_id, item in positions.items(): if other_id != _id and item.x != inf: contact_exists = other_id in contacts[_id] contact_exists = contact_exists or _id in contacts[other_id] # Beginning a new contact if node_position - item <= radius and not contact_exists: contacts[_id][other_id] = (time, x, y, item.x, item.y) contacts[other_id][_id] = (time, item.x, item.y, x, y) # Ending an existing contact elif node_position - item > radius and contact_exists: c = contacts[_id][other_id] begin, begin_x, begin_y, begin_xo, begin_yo = c duration = time - begin s = "{} {} ".format(_id, other_id) s += "{} {} {} ".format(time, begin, duration) s += "{} {} ".format(begin_x, begin_y) s += "{} {}\n".format(begin_xo, begin_yo) saida.write(s) del contacts[_id][other_id] del contacts[other_id][_id] curr_time = time for node_id in positions.keys(): reported_time = positions[node_id].t if curr_time - reported_time > self.stayTime: positions[node_id].x = inf positions[node_id].y = inf # At this point, the trace has ended, but we still need to close # open contacts. # We add to a vector to sort by starting time last_contacts = [] # We dont want to modify the dict while parsing it, so: reported = dict() for _id, contact in contacts.items(): for other_id, report in contact.items(): if (_id, other_id) not in reported: begin, begin_x, begin_y, begin_xo, begin_yo = report duration = time - begin s = "{} {} ".format(_id, other_id) s += "{} {} {} ".format(time, begin, duration) s += "{} {} ".format(begin_x, begin_y) s += "{} {}\n".format(begin_xo, begin_yo) last_contacts.append((s, begin)) reported[(_id, other_id)] = True reported[(other_id, _id)] = True last_contacts = sorted(last_contacts, key=itemgetter(1)) for lc in last_contacts: saida.write(lc[0]) bar.finish() return output_filename
def voronoi(self): if "SPAV" not in self.metrics and "VIST" not in self.metrics and "CONEN" not in self.metrics: return with open(self.file, 'r') as entrada: bar = Bar(self.filesize / 2, "Extracting SPAV, CONEN and VIST") for line in entrada: split = line.strip().split(" ") user1 = split[0] user1X, user1Y = float(split[5]), float(split[6]) user2 = split[1] user2X, user2Y = float(split[7]), float(split[8]) time = float(split[4]) if "CONEN" in self.metrics: if user1 in self.usersContacts: current_value = self.usersContacts[user1].get(user2, 0) self.usersContacts[user1][user2] = current_value + 1 else: self.usersContacts[user1] = {} if user2 in self.usersContacts: current_value = self.usersContacts[user2].get(user1, 0) self.usersContacts[user2][user1] = current_value + 1 else: self.usersContacts[user2] = {} distanceToCloser1 = distanceToCloser2 = sys.float_info.max user1Venue = user2Venue = 0 if "SPAV" in self.metrics or "VIST" in self.metrics: for i in range(0, len(self.venues)): splitVe = [float(c) for c in self.venues[i].split(" ")] dist = self.euclideanDistance(user1X, user1Y, splitVe[0], splitVe[1]) if (dist < distanceToCloser1): distanceToCloser1 = dist user1Venue = i dist = self.euclideanDistance(user2X, user2Y, splitVe[0], splitVe[1]) if (dist < distanceToCloser2): distanceToCloser2 = dist user2Venue = i if "SPAV" in self.metrics: if user1 in self.usersVenues: currValue = self.usersVenues[user1].get(user1Venue, 0) self.usersVenues[user1][user1Venue] = currValue + 1 else: self.usersVenues[user1] = {} if user2 in self.usersVenues: currValue = self.usersVenues[user2].get(user2Venue, 0) self.usersVenues[user2][user2Venue] = currValue + 1 else: self.usersVenues[user2] = {} if "VIST" in self.metrics: # Extracts Visit Time for user 1 if (user1 in self.vist): visitTime = self.vist[user1].get(user1Venue, 0.0) self.vist[user1][user1Venue] = visitTime + time else: self.vist[user1] = {} # Extracts Visit Time for user 2 if (user2 in self.vist): visitTime = self.vist[user2].get(user2Venue, 0.0) self.vist[user2][user1Venue] = visitTime + time else: self.vist[user2] = {} bar.progress() bar.finish()
def parseRaw(self, filename): self.preParseRaw(filename) cells = {} g = Graph() positionDictionary = {} beginingPositions = {} with open(self.generateFileName(filename), 'w+') as out: newLines = 0 bar = Bar(self.filesize, "Parsing RAW file") with open(filename) as entrada: for i, line in enumerate(entrada): bar.progress() components = line.split(" ") _id = int(components[0]) posX = float(components[1]) posY = float(components[2]) coordX = math.floor(posX / self.r) coordY = math.floor(posY / self.r) time = float(components[3]) user = User(_id, posX, posY) u1 = user.toString() u1x, u1y = user.x, user.y try: entry = positionDictionary[u1] entryX, entryY = entry.positionX, entry.positionY if entryX != posX or entryY != posY: # The node moved oldCell = Cell(entry.coordX, entry.coordY) usrInCell = cells[oldCell.toString()] usrInCell = self.removeUserFromCell(usrInCell, u1) cells[oldCell.toString()] = usrInCell oldUser = User(_id, entryX, entryY) r = self.r adj = self.getAdjacentCellUsers(cells, oldUser, r) newCell = Cell(coordX, coordY) try: usrInCell = cells[newCell.toString()] usrInCell.append(user) cells[newCell.toString()] = usrInCell except: usrInCell = [] usrInCell.append(user) cells[newCell.toString()] = usrInCell for user2 in adjacent: u2x, u2y = user2.x, user2.y u2 = user2.toString() euc = self.euclidean(u1x, u1y, u2x, u2y) if euc <= self.r: vert1 = g.get_vertex(u1) conected = False for vert2 in vert1.get_connections(): if vert2.get_id() == u2: conected = True g.add_edge(u1, u2, time) g.add_edge(u2, u1, time) break if not conected: g.add_edge(u1, u2, time) g.add_edge(u2, u1, time) encounter = Encounter(int(u1), int(u2)) enc = encounter.toString() pos = str(u1x) + " " + str(u1y) + " " pos += str(u2x) + " " + str(u2y) beginingPositions[enc] = pos elif (g.containsEdge(u1, u2)): encounter = Encounter(int(u1), int(u2)) enc = encounter.toString() beginPos = beginingPositions[enc] out.write( self.generateEntry( user, user2, time, g, beginPos)) newLines += 1 g.remove_edge(u1, u2) g.remove_edge(u2, u1) e = PositionEntry(posX, posY, coordX, coordY, time) positionDictionary[u1] = e except: e = PositionEntry(posX, posY, coordX, coordY, time) positionDictionary[u1] = e g.add_vertex(u1) newCell = Cell(coordX, coordY) try: usersInCell = cells[newCell.toString()] usersInCell.append(user) cells[newCell.toString()] = usersInCell except: usersInCell = [] usersInCell.append(user) cells[newCell.toString()] = usersInCell rangeXBegin = 0 if (coordX - 1 > 0): rangeXBegin = coordX - 1 rangeYBegin = 0 if (coordY - 1 > 0): rangeYBegin = coordY - 1 while (rangeXBegin <= coordX + 1): while (rangeYBegin <= coordY + 1): newCell = Cell(rangeXBegin, rangeYBegin) try: u1 = user.toString() u1x, u1y = user.x, user.y usersInCell = cells[newCell.toString()] for user2 in usersInCell: u2 = user2.toString() u2x, u2y = user2.x, user2.y if (u1 != u2): eu = self.euclidean(u1x, u1y, u2x, u2y) if eu <= self.r: if (g.containsEdge(u1, u2)): g.add_edge(u1, u2, time) g.add_edge(u2, u1, time) else: g.add_edge(u1, u2, time) g.add_edge(u2, u1, time) e = Encounter(int(u1), int(u2)) enc = e.toString() pos = str(u1x) + " " + str(u1y) pos += " " + str(u2x) + " " pos += str(u2y) beginingPositions[enc] = pos elif (g.containsEdge(u1, u2)): e = Encounter(int(u1), int(u2)) enc = e.toString() beginPos = beginingPositions[enc] out.write( self.generateEntry( user, user2, time, g, beginPos)) newLines += 1 g.remove_edge(u1, u2) g.remove_edge(u2, u1) except: pass rangeYBegin += 1 if (coordX - 1 > 0): rangeYBegin = coordY - 1 else: rangeYBegin = 0 rangeXBegin += 1 self.filesize = newLines bar.finish() return self.generateFileName(filename)