Beispiel #1
0
def main():
    sourceId = '--Enter your source id--'
    orgId = '--Enter your org id--'
    apiKey = '--Enter your API key--'

    # Setup the push client
    push = CoveoPush.Push(sourceId, orgId, apiKey)

    # First add the document
    mydoc = Document("https://myreference&id=TESTME")
    # Set plain text
    mydoc.SetData("ALL OF THESE WORDS ARE SEARCHABLE")
    # Set FileExtension
    mydoc.FileExtension = ".html"
    # Add Metadata
    mydoc.AddMetadata("connectortype", "CSV")
    # Set the title
    mydoc.Title = "THIS IS A TEST"
    # Set permissions
    user_email = "*****@*****.**"
    # Create a permission identity
    myperm = CoveoPermissions.PermissionIdentity(
        CoveoConstants.Constants.PermissionIdentityType.User, "", user_email)
    # Set the permissions on the document
    allowAnonymous = True
    mydoc.SetAllowedAndDeniedPermissions([myperm], [], allowAnonymous)

    # Push the document
    push.AddSingleDocument(mydoc)

    time.sleep(100)

    # Remove it
    push.RemoveSingleDocument('https://myreference&id=TESTME')
Beispiel #2
0
def main():
    sourceId = os.environ.get('PUSH_SOURCE_ID') or '--Enter your source id--'
    orgId = os.environ.get('PUSH_ORG_ID') or '--Enter your org id--'
    apiKey = os.environ.get('PUSH_API_KEY') or '--Enter your API key--'

    # Setup the push client
    push = CoveoPush.Push(sourceId, orgId, apiKey)

    # Create a document
    mydoc = Document("https://myreference/doc2")
    mydoc.SetData("This is document Two")
    mydoc.FileExtension = ".html"
    mydoc.AddMetadata("authors", "*****@*****.**")
    mydoc.Title = "What's up Doc 2?"

    # Push the document
    push.AddSingleDocument(mydoc)
Beispiel #3
0
def main():
    sourceId = '--Enter your source id--'
    orgId = '--Enter your org id--'
    apiKey = '--Enter your API key--'

    # Setup the push client
    push = CoveoPush.Push(sourceId, orgId, apiKey)
    # Get a first Ordering Id
    startOrderingId = push.CreateOrderingId()

    # Create a document
    mydoc = Document("https://myreference&id=TESTME")
    # Set plain text
    mydoc.SetData("ALL OF THESE WORDS ARE SEARCHABLE")
    # Set FileExtension
    mydoc.FileExtension = ".html"
    # Add Metadata
    mydoc.AddMetadata("connectortype", "CSV")
    authors = []
    authors.append("Coveo")
    authors.append("R&D")
    # rssauthors should be set as a multi-value field in your Coveo Cloud organization
    mydoc.AddMetadata("rssauthors", authors)
    # Set the Title
    mydoc.Title = "THIS IS A TEST"
    # Set permissions
    user_email = "*****@*****.**"
    # Create a permission identity
    myperm = CoveoPermissions.PermissionIdentity(
        CoveoConstants.Constants.PermissionIdentityType.User, "", user_email)
    # Set the permissions on the document
    allowAnonymous = True
    mydoc.SetAllowedAndDeniedPermissions([myperm], [], allowAnonymous)

    # Push the document
    push.AddSingleDocument(mydoc)

    # Delete older documents
    push.DeleteOlderThan(startOrderingId)
Beispiel #4
0
def scrap():
    pokemon_list_page = requests.get('https://pokemondb.net/pokedex/national')
    soup_pokemon_list_page = BeautifulSoup(pokemon_list_page.content,
                                           'html.parser')
    results = soup_pokemon_list_page.find(id='main')
    info_cards = results.find_all('div', class_='infocard')

    coveo_source_id = os.environ.get("COVEO_SOURCE_ID")
    coveo_api_key = os.environ.get("COVEO_API_KEY")
    coveo_org_id = os.environ.get("COVEO_ORG_ID")

    push = CoveoPush.Push(coveo_source_id, coveo_org_id, coveo_api_key)
    push.Start(True, True)
    push.SetSizeMaxRequest(150 * 1024 * 1024)

    user_email = os.environ.get("USER_EMAIL")
    my_permissions = CoveoPermissions.PermissionIdentity(
        CoveoConstants.Constants.PermissionIdentityType.User, "", user_email)

    for info_card in info_cards:
        pokemon_name = info_card.find('a', class_='ent-name').text
        pokemon_page_url = 'https://pokemondb.net' + info_card.find(
            'a', class_='ent-name')['href']

        document = Document(pokemon_page_url)

        pokemon_picture_url = info_card.find('span',
                                             class_='img-fixed img-sprite')

        if pokemon_picture_url is None:
            pokemon_picture_url = info_card.find(
                'span',
                class_='img-fixed img-sprite img-sprite-v18')['data-src']
        else:
            pokemon_picture_url = info_card.find(
                'span', class_='img-fixed img-sprite')['data-src']

        pokemon_number = info_card.find('small').text[1:]
        pokemon_gen = find_gen(int(pokemon_number))
        pokemon_types = []
        pokemon_types_tags = info_card.find_all('small')[1].find_all('a')

        print('scrapping pokemon: ' + pokemon_name + ' | index : ' +
              pokemon_number)

        for pokemon_type_tag in pokemon_types_tags:
            pokemon_types.append(pokemon_type_tag.text)

        pokemon_page = requests.get(pokemon_page_url)
        soup_pokemon_page = BeautifulSoup(pokemon_page.content, 'html.parser')
        results = soup_pokemon_page.find(id='main')
        tables = results.find_all('table', class_='vitals-table')

        pokemon_species = tables[0].find_all('tr')[2].find('td').text
        pokemon_height = tables[0].find_all('tr')[3].find('td').text
        pokemon_weight = tables[0].find_all('tr')[4].find('td').text

        base_stats = {}
        base_stats_tags = tables[3].find_all('tr')

        for base_stat_tag in base_stats_tags:
            base_stats[base_stat_tag.find('th').text] = base_stat_tag.find(
                'td').text

        defense = {}
        defenses_tables = results.find_all(
            'table', class_='type-table type-table-pokedex')

        for defense_table in defenses_tables:
            for x in range(
                    0, len(defense_table.find_all('tr')[0].find_all('th'))):
                defense[defense_table.find_all('tr')[0].find_all('th')[x].find('a').text] = \
                    defense_table.find_all('tr')[1].find_all('td')[x].text
                document.AddMetadata(
                    defense_table.find_all('tr')[0].find_all('th')[x].find(
                        'a').text,
                    defense_table.find_all('tr')[1].find_all('td')[x].text)

        document.Title = pokemon_name
        document.SetData(pokemon_page.text)
        document.FileExtension = ".html"
        document.AddMetadata('name', pokemon_name)
        document.AddMetadata('url', pokemon_page_url)
        document.AddMetadata('number', pokemon_number)
        document.AddMetadata('generation', pokemon_gen)
        document.AddMetadata('types', pokemon_types)
        document.AddMetadata('specie', pokemon_species)
        document.AddMetadata('weight', pokemon_weight)
        document.AddMetadata('weight_int',
                             pokemon_weight[0:pokemon_weight.index('kg') - 1])
        document.AddMetadata('height', pokemon_height)
        document.AddMetadata('height_int',
                             pokemon_height[0:pokemon_height.index('m') - 1])
        document.AddMetadata('hp', base_stats.get('HP'))
        document.AddMetadata('hp_int', base_stats.get('HP'))
        document.AddMetadata('attack', base_stats.get('Attack'))
        document.AddMetadata('attack_int', base_stats.get('Attack'))
        document.AddMetadata('defense', base_stats.get('Defense'))
        document.AddMetadata('defense_int', base_stats.get('Defense'))
        document.AddMetadata('sp_atk', base_stats.get('Sp.Atk'))
        document.AddMetadata('sp_def', base_stats.get('Sp.Def'))
        document.AddMetadata('speed', base_stats.get('Speed'))
        document.AddMetadata('speed_int', base_stats.get('Speed'))
        document.AddMetadata('picture_url', pokemon_picture_url)
        document.SetAllowedAndDeniedPermissions([my_permissions], [], True)

        print('Send: ' + pokemon_name + ' | index : ' + pokemon_number +
              ' to the PUSH API')
        push.Add(document)
        print('Sent: ' + pokemon_name + ' | index : ' + pokemon_number +
              ' to the PUSH API')

    push.End(True, True)
Beispiel #5
0
    poke_type = ""
    for pokemon_type in poke_types_entry:
        poke_type += pokemon_type.text + ";"
    poke_type = poke_type[:-1]

    # This make sure that there are no special characthers
    try:
        print(name)
    except:
        name = name[:-1]

    # First add the document
    mydoc = Document(link)

    # Set plain text
    mydoc.SetData(name + ' ' + poke_type.replace(";", " ") + ' ' + generation)

    # Set FileExtension
    mydoc.FileExtension = ".html"

    # Add Metadata
    mydoc.AddMetadata("connectortype", "HTML")
    mydoc.AddMetadata("pokemon_name", name)
    mydoc.AddMetadata("pokemon_picture", pic_url)
    mydoc.AddMetadata("pokemon_type", poke_type)
    mydoc.AddMetadata("pokemon_generation", generation[len(generation) - 1:])
    mydoc.AddMetadata("pokemon_number", number)

    # Set the title
    mydoc.Title = name