def SnapPOIs(id2Node, nodeIds, datDirectory): # Grab dog-friendly POI's dogOKFileName = '{}/dogOKs.dat'.format(datDirectory) with open(dogOKFileName) as f: dogOKs = [dogOK.strip() for dogOK in f.readlines()] # Iterate over POI types id2Poi = {} for poiType in poiTypes: PrintNow('Snapping {} to intersections ...'.format(poiType)) # POI specifics skip = poiType2Skip.get(poiType) # Read POI .json jsonFileName = '{}/{}.json'.format(datDirectory, poiType) PrintNow('Reading {:s} ... '.format(jsonFileName), end = '') with open(jsonFileName, 'r') as f: json = JSONLoad(f.read()) PrintNow('done') # Iterate over businesses businesses = json.get('businesses') length = len(businesses) for index in range(length): # Skip junk data and dog-unfriendly yelpUrl = businesses[index].get('url') if index in skip or yelpUrl not in dogOKs: continue json = businesses[index] poiId = POIHash(json, poiType) latlng = json.get('location').get('latlng') # Kick out ill-defined POI's if latlng is None: continue # Attach ... PrintNow('{:4d}/{:4d}:\t{} .. '.format(index + 1, length, json.get('name')), end = '') latitude, longitude = latlng nodeId, offset = NearestNode(latitude, longitude, nodeIds, id2Node) PrintNow('to {}'.format(nodeId)) # ... POI onto node ... id2Node.get(nodeId).poiIds.append(poiId) # ... and node onto POI if poiId in id2Poi: id2Poi.get(poiId).nodeIds.append(nodeId) id2Poi.get(poiId).offsets.append(offset) else: id2Poi[poiId] = POI(json, poiType, [nodeId], [offset]) PrintNow('Added {:d} POIs'.format(len(id2Poi))) return id2Poi
def SnapTrees(id2Node, id2Edge, graphIds, datDirectory): id2Tree = {} PrintNow('Snapping trees to edges ...') # Map nodeId to edgeIds nodeId2EdgeIds = {} for edgeId, edge in id2Edge.items(): # Kick out non-subgraph edges if not any(nodeId in graphIds for nodeId in edge.nodeIds): continue for nodeId in edge.nodeIds: # Kick out missing nodes if nodeId not in id2Node: continue try: nodeId2EdgeIds[nodeId].append(edgeId) except KeyError: nodeId2EdgeIds[nodeId] = [edgeId] # Order nodes by position nodeIds = sorted(nodeId2EdgeIds.keys(), key = lambda nodeId: (id2Node.get(nodeId).latitude, id2Node.get(nodeId).longitude)) # Read tree .json jsonFileName = '{}/{}.json'.format(datDirectory, treeFileName) PrintNow('Reading {:s} ... '.format(jsonFileName), end = '') with open(jsonFileName, 'r') as f: json = JSONLoad(f.read()) PrintNow('done') # Iterate over trees trees = json.get('data') # Order trees by position trees = sorted((tree for tree in trees if tree[23] is not None and tree[24] is not None), key = lambda tree: (tree[23], tree[24])) length = len(trees) prevLatLng, prevNodeId = (None, None), None for index in range(length): treeList = trees[index] treeId = treeList[0] variety = treeList[10] latitude, longitude = treeList[23 : 25] # Kick out ill-defined or repeat trees if latitude is None or longitude is None: continue # Snap to a node ... PrintNow('{:5d}/{:5d} .. '.format(index + 1, length, treeList[10][ : 10]), end = '') id2Tree[treeId] = Tree(treeId, variety, latitude, longitude) latitude, longitude = float(latitude), float(longitude) if prevLatLng == (latitude, longitude): nodeId = prevNodeId else: nodeId, junk = NearestNode(latitude, longitude, nodeIds, id2Node) prevLatLng, prevNodeId = (latitude, longitude), nodeId # ... grab its edges ... edgeIds = nodeId2EdgeIds.get(nodeId) # ... and increment them PrintNow('to {}'.format(','.join(str(edgeId) for edgeId in edgeIds))) for edgeId in edgeIds: id2Edge.get(edgeId).treeCount += 1 PrintNow('Added {:d} trees'.format(len(id2Tree))) return id2Tree
def playlist_videoId(url) -> list: """Processa o link da playlist, e encontra os videos que fazem parte dela. :param str url: Uma URL de playlist válida. :return list: [Nome da Playlist, Dict contendo os videoIds e os seus respectivos nomes] """ response = req.get(url) if response.status_code != 200: print("Não foi possivel coletar informações.") print(f"HTTP Code: {response.status_code}") return None text_response = response.text info_videos = dict() ## Encontra o script js que contem objeto ytInitialData. Nele estão os links. if 'ytInitialData' in text_response: index_init = text_response.find("ytInitialData") + 16 index_final = text_response.find('</script>', index_init) - 1 script = text_response[index_init:index_final] else: print('Não encontrado objeto ytInitialData no retorno. Verifique') return None # ytInicialData é um objeto JavaScript, que pode ser lido com um JSON. # Logo, é isso que faremos, buscando as subchaves, até chegar no na chave # contents, que é uma lista Python contendo as propriedades de cada video, # como o seu id, que será utilizado para identifica-lo temp = JSloads(script) # with open('script.js', 'w') as fl: # fl.write(JSdumps(temp)) # quit() ######################################################################### # Se a playlist não for pública, o pytube não a encontrará. if "alerts" in temp.keys(): erro = temp.get("alerts")[0] if "alertRenderer" in erro.keys(): erro1 = erro.get("alertRenderer").get("text").get("runs")[0].get( "text") print("Não foi possivel encontrar a playlist informada.") print(f"Detalhes: {erro1}") print( "Certifique-se de que a playlist é pública, e tente novamente." ) return None elif "alertWithButtonRenderer" in erro.keys(): print("Alguns vídeos não estão disponiveis. Verifique") else: print( "Erros desconhecidos identificados. Verifique em 'script.js'") with open('script.js', 'w') as fl: fl.write(JSdumps(temp)) return None # Os vídeos da playlist estão neste caminho: # contents/twoColumnBrowseResultsRenderer/tabs/tabRenderer/content/sectionListRenderer/contents ... # itemSectionRenderer/contents/playlistVideoListRenderer/contents playlist_title = str( temp.get("metadata").get("playlistMetadataRenderer").get( "title")).strip() # O nome da playlist será o nome da pasta que será criada no SO. Trata a string. playlist_title = playlist_title.replace("\\", "_").replace("/", "_") playlist_title = playlist_title.replace(":", "_").replace("?", "_") playlist_title = playlist_title.replace('"', '').replace("'", "") playlist_title = playlist_title.replace("<", "_").replace('>', '_') playlist_title = playlist_title.replace("|", "_") playlist_title = playlist_title[:100] temp1 = temp.get('contents').get("twoColumnBrowseResultsRenderer") temp2 = temp1.get('tabs')[0].get("tabRenderer") temp3 = temp2.get("content").get("sectionListRenderer").get("contents")[0] temp4 = temp3.get("itemSectionRenderer").get("contents")[0].get( "playlistVideoListRenderer") obj_videos = temp4.get("contents") ## Para testar o processamento de playlists com mais de 100 videos if len(obj_videos) > 100: ctoken = obj_videos[len(obj_videos) - 1].get( "continuationItemRenderer").get("continuationEndpoint").get( "continuationCommand").get("token") with open('ctoken.txt', 'w') as fl: fl.write(ctoken) # Armazena o videoId e o Nome do video para realizar o download. for video_prop in obj_videos: #video_prop será um dict # info_videos = video_prop videoId = video_prop.get("playlistVideoRenderer").get("videoId") videoName = str( video_prop.get("playlistVideoRenderer").get("title").get("runs") [0].get("text")).strip() # O nome do vídeo será usado para gravar o arquivo no SO. Trata a string videoName = videoName.replace("\\", "_").replace("/", "_") videoName = videoName.replace(":", "_").replace("?", "_") videoName = videoName.replace('"', '').replace("'", "") videoName = videoName.replace("<", "_").replace('>', '_') videoName = videoName.replace("|", "_") videoName = videoName[:100] info_videos[videoId] = videoName return [playlist_title, info_videos]