Ejemplo n.º 1
0
def SnapPOIs(id2Node, nodeIds, datDirectory):

    # Grab dog-friendly POI's

    dogOKFileName = '{}/dogOKs.dat'.format(datDirectory)
    with open(dogOKFileName) as f:
        dogOKs = [dogOK.strip() for dogOK in f.readlines()]

    # Iterate over POI types

    id2Poi = {}
    for poiType in poiTypes:
        PrintNow('Snapping {} to intersections ...'.format(poiType))

        # POI specifics

        skip = poiType2Skip.get(poiType)

        # Read POI .json

        jsonFileName = '{}/{}.json'.format(datDirectory, poiType)
        PrintNow('Reading {:s} ... '.format(jsonFileName), end = '')
        with open(jsonFileName, 'r') as f:
            json = JSONLoad(f.read())
        PrintNow('done')

        # Iterate over businesses

        businesses = json.get('businesses')
        length = len(businesses)

        for index in range(length):

            # Skip junk data and dog-unfriendly

            yelpUrl = businesses[index].get('url')
            if index in skip or yelpUrl not in dogOKs:
                continue

            json = businesses[index]

            poiId = POIHash(json, poiType)
            latlng = json.get('location').get('latlng')

            # Kick out ill-defined POI's

            if latlng is None:
                continue

            # Attach ...

            PrintNow('{:4d}/{:4d}:\t{} .. '.format(index + 1, length, json.get('name')), end = '')

            latitude, longitude = latlng
            nodeId, offset = NearestNode(latitude, longitude, nodeIds, id2Node)

            PrintNow('to {}'.format(nodeId))

            # ... POI onto node ...

            id2Node.get(nodeId).poiIds.append(poiId)

            # ... and node onto POI

            if poiId in id2Poi:
                id2Poi.get(poiId).nodeIds.append(nodeId)
                id2Poi.get(poiId).offsets.append(offset)
            else:
                id2Poi[poiId] = POI(json, poiType, [nodeId], [offset])

    PrintNow('Added {:d} POIs'.format(len(id2Poi)))

    return id2Poi
Ejemplo n.º 2
0
def SnapTrees(id2Node, id2Edge, graphIds, datDirectory):
    id2Tree = {}
    PrintNow('Snapping trees to edges ...')

    # Map nodeId to edgeIds

    nodeId2EdgeIds = {}
    for edgeId, edge in id2Edge.items():

        # Kick out non-subgraph edges

        if not any(nodeId in graphIds for nodeId in edge.nodeIds):
            continue

        for nodeId in edge.nodeIds:

            # Kick out missing nodes

            if nodeId not in id2Node:
                continue

            try:
                nodeId2EdgeIds[nodeId].append(edgeId)
            except KeyError:
                nodeId2EdgeIds[nodeId] = [edgeId]

    # Order nodes by position

    nodeIds = sorted(nodeId2EdgeIds.keys(), key = lambda nodeId: (id2Node.get(nodeId).latitude, id2Node.get(nodeId).longitude))

    # Read tree .json

    jsonFileName = '{}/{}.json'.format(datDirectory, treeFileName)
    PrintNow('Reading {:s} ... '.format(jsonFileName), end = '')
    with open(jsonFileName, 'r') as f:
        json = JSONLoad(f.read())
    PrintNow('done')

    # Iterate over trees

    trees = json.get('data')

    # Order trees by position

    trees = sorted((tree for tree in trees if tree[23] is not None and tree[24] is not None), key = lambda tree: (tree[23], tree[24]))

    length = len(trees)
    prevLatLng, prevNodeId = (None, None), None
    for index in range(length):
        treeList = trees[index]

        treeId = treeList[0]
        variety = treeList[10]
        latitude, longitude = treeList[23 : 25]

        # Kick out ill-defined or repeat trees

        if latitude is None or longitude is None:
            continue

        # Snap to a node ...

        PrintNow('{:5d}/{:5d} .. '.format(index + 1, length, treeList[10][ : 10]), end = '')

        id2Tree[treeId] = Tree(treeId, variety, latitude, longitude)
        latitude, longitude = float(latitude), float(longitude)
        if prevLatLng == (latitude, longitude):
            nodeId = prevNodeId
        else:
            nodeId, junk = NearestNode(latitude, longitude, nodeIds, id2Node)

            prevLatLng, prevNodeId = (latitude, longitude), nodeId

        # ... grab its edges ...

        edgeIds = nodeId2EdgeIds.get(nodeId)

        # ... and increment them

        PrintNow('to {}'.format(','.join(str(edgeId) for edgeId in edgeIds)))
        for edgeId in edgeIds:
            id2Edge.get(edgeId).treeCount += 1

    PrintNow('Added {:d} trees'.format(len(id2Tree)))

    return id2Tree
Ejemplo n.º 3
0
def playlist_videoId(url) -> list:
    """Processa o link da playlist, e encontra os videos que fazem parte dela.

  :param str url: Uma URL de playlist válida.
  :return list: [Nome da Playlist, Dict contendo os videoIds e os seus respectivos nomes]
  """
    response = req.get(url)

    if response.status_code != 200:
        print("Não foi possivel coletar informações.")
        print(f"HTTP Code: {response.status_code}")
        return None

    text_response = response.text
    info_videos = dict()

    ## Encontra o script js que contem objeto ytInitialData. Nele estão os links.
    if 'ytInitialData' in text_response:
        index_init = text_response.find("ytInitialData") + 16
        index_final = text_response.find('</script>', index_init) - 1
        script = text_response[index_init:index_final]
    else:
        print('Não encontrado objeto ytInitialData no retorno. Verifique')
        return None

    # ytInicialData é um objeto JavaScript, que pode ser lido com um JSON.
    # Logo, é isso que faremos, buscando as subchaves, até chegar no na chave
    # contents, que é uma lista Python contendo as propriedades de cada video,
    # como o seu id, que será utilizado para identifica-lo
    temp = JSloads(script)
    # with open('script.js', 'w') as fl:
    #   fl.write(JSdumps(temp))
    # quit()

    #########################################################################
    # Se a playlist não for pública, o pytube não a encontrará.
    if "alerts" in temp.keys():
        erro = temp.get("alerts")[0]

        if "alertRenderer" in erro.keys():
            erro1 = erro.get("alertRenderer").get("text").get("runs")[0].get(
                "text")
            print("Não foi possivel encontrar a playlist informada.")
            print(f"Detalhes: {erro1}")
            print(
                "Certifique-se de que a playlist é pública, e tente novamente."
            )
            return None

        elif "alertWithButtonRenderer" in erro.keys():
            print("Alguns vídeos não estão disponiveis. Verifique")

        else:
            print(
                "Erros desconhecidos identificados. Verifique em 'script.js'")
            with open('script.js', 'w') as fl:
                fl.write(JSdumps(temp))
            return None

    # Os vídeos da playlist estão neste caminho:
    # contents/twoColumnBrowseResultsRenderer/tabs/tabRenderer/content/sectionListRenderer/contents ...
    #   itemSectionRenderer/contents/playlistVideoListRenderer/contents
    playlist_title = str(
        temp.get("metadata").get("playlistMetadataRenderer").get(
            "title")).strip()

    # O nome da playlist será o nome da pasta que será criada no SO. Trata a string.
    playlist_title = playlist_title.replace("\\", "_").replace("/", "_")
    playlist_title = playlist_title.replace(":", "_").replace("?", "_")
    playlist_title = playlist_title.replace('"', '').replace("'", "")
    playlist_title = playlist_title.replace("<", "_").replace('>', '_')
    playlist_title = playlist_title.replace("|", "_")
    playlist_title = playlist_title[:100]

    temp1 = temp.get('contents').get("twoColumnBrowseResultsRenderer")
    temp2 = temp1.get('tabs')[0].get("tabRenderer")
    temp3 = temp2.get("content").get("sectionListRenderer").get("contents")[0]
    temp4 = temp3.get("itemSectionRenderer").get("contents")[0].get(
        "playlistVideoListRenderer")
    obj_videos = temp4.get("contents")

    ## Para testar o processamento de playlists com mais de 100 videos
    if len(obj_videos) > 100:
        ctoken = obj_videos[len(obj_videos) - 1].get(
            "continuationItemRenderer").get("continuationEndpoint").get(
                "continuationCommand").get("token")
        with open('ctoken.txt', 'w') as fl:
            fl.write(ctoken)

    # Armazena o videoId e o Nome do video para realizar o download.
    for video_prop in obj_videos:  #video_prop será um dict
        # info_videos = video_prop
        videoId = video_prop.get("playlistVideoRenderer").get("videoId")
        videoName = str(
            video_prop.get("playlistVideoRenderer").get("title").get("runs")
            [0].get("text")).strip()

        # O nome do vídeo será usado para gravar o arquivo no SO. Trata a string
        videoName = videoName.replace("\\", "_").replace("/", "_")
        videoName = videoName.replace(":", "_").replace("?", "_")
        videoName = videoName.replace('"', '').replace("'", "")
        videoName = videoName.replace("<", "_").replace('>', '_')
        videoName = videoName.replace("|", "_")
        videoName = videoName[:100]

        info_videos[videoId] = videoName

    return [playlist_title, info_videos]