def add_document(post):
    # Create new push document
    mydoc = Document('https://myreference&id='+post['UserName'])
    # Build up the quickview/preview (HTML)
    content = "<meta charset='UTF-16'><meta http-equiv='Content-Type' content='text/html; charset=UTF-16'><html><head><title>"+post['FirstName']+" "+post['LastName']+" ("+post['JobFunction']+")</title><style>.datagrid table { border-collapse: collapse; text-align: left; } .datagrid {display:table !important;font: normal 12px/150% Arial, Helvetica, sans-serif; background: #fff; overflow: hidden; border: 1px solid #006699; -webkit-border-radius: 3px; -moz-border-radius: 3px; border-radius: 3px; }.datagrid table td, .datagrid table th { padding: 3px 10px; }.datagrid table thead th {background:-webkit-gradient( linear, left top, left bottom, color-stop(0.05, #006699), color-stop(1, #00557F) );background:-moz-linear-gradient( center top, #006699 5%, #00557F 100% );filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#006699', endColorstr='#00557F');background-color:#006699; color:#FFFFFF; font-size: 15px; font-weight: bold; border-left: 1px solid #0070A8; } .datagrid table thead th:first-child { border: none; }.datagrid table tbody td { color: #00496B; border-left: 1px solid #E1EEF4;font-size: 12px;font-weight: normal; }.datagrid table tbody  tr:nth-child(even)  td { background: #E1EEF4; color: #00496B; }.datagrid table tbody td:first-child { border-left: none; }.datagrid table tbody tr:last-child td { border-bottom: none; }</style></head><body style='Font-family:Arial'><div class='datagrid'><table><tbody><tr><td>FirstName</td><td>"+post[
        'FirstName']+"</td></tr><tr><td>MiddleName</td><td>"+post['MiddleName']+"</td></tr><tr><td>LastName</td><td>"+post['LastName']+"</td></tr><tr><td>PositionDescription</td><td>"+post['PositionDescription']+"</td></tr><tr><td>JobFunction</td><td>"+post['JobFunction']+"</td></tr><tr><td>JobFamily</td><td>post['JobFamily']</td></tr></tbody></table></div></body></html>"
    mydoc.SetContentAndZLibCompress(content)

    # Set the fileextension
    mydoc.FileExtension = ".html"
    # Set metadata
    mydoc.AddMetadata("connectortype", "CSV")
    authors = []
    authors.append("Coveo")
    authors.append("R&D")
    # rssauthors should be set as a multi-value field in your Coveo Cloud organization
    mydoc.AddMetadata("rssauthors", authors)
    # Set the date
    mydoc.SetDate(datetime.datetime.now())
    mydoc.SetModifiedDate(datetime.datetime.now())
    mydoc.Title = post['FirstName']+' ' + post['LastName']+' '+'('+post['JobFunction']+')'

    # Set permissions
    user_email = "*****@*****.**"
    myperm = CoveoPermissions.PermissionIdentity(CoveoConstants.Constants.PermissionIdentityType.User, "", user_email)
    mydoc.SetAllowedAndDeniedPermissions([myperm], [], True)

    print('\nUser %s for title "%s"' % (user_email, post['FirstName']))
    return mydoc
Exemple #2
0
def create_document(people):
    # Create new push document
    mydoc = Document('https://myreference?id=' + str(people['id']))
    # Build up the quickview/preview (HTML)
    content = "<HTML><BODY></BODY></HTML>"
    mydoc.SetContentAndZLibCompress(content)

    return mydoc
Exemple #3
0
def addRating(rate):
    document_id = 'https://rating?id=' + rate['id']
    mydoc = Document(document_id)
    # Set the fileextension
    mydoc.FileExtension = ".html"
    mydoc.AddMetadata('myratingid', rate['id'])
    mydoc.AddMetadata('myhouseid', rate['house_id'])
    mydoc.AddMetadata('myratingtype', rate['type'])
    mydoc.AddMetadata('myratingage', rate['age'])
    mydoc.AddMetadata('myrating', rate['rate'])
    mydoc.AddMetadata('objecttype', 'Rating')

    return mydoc
def main():
    sourceId = os.environ.get('PUSH_SOURCE_ID') or '--Enter your source id--'
    orgId = os.environ.get('PUSH_ORG_ID') or '--Enter your org id--'
    apiKey = os.environ.get('PUSH_API_KEY') or '--Enter your API key--'

    # Setup the push client
    push = CoveoPush.Push(sourceId, orgId, apiKey)

    myfile = os.path.join('testfiles', 'Example.pptx')
    # Create a document
    mydoc = Document('file:///' + myfile)
    # Get the file contents and add it to the document
    mydoc.GetFileAndCompress(myfile)
    # Set the metadata
    mydoc.AddMetadata("connectortype", "PPTX")

    # rssauthors should be set as a multi-value field in your Coveo Cloud organization
    mydoc.AddMetadata("rssauthors", ["Coveo", "R&D"])
    # Set the title
    mydoc.Title = "THIS IS A TEST"
    # Set permissions
    user_email = "*****@*****.**"
    # Create a permission Identity
    myperm = CoveoPermissions.PermissionIdentity(
        CoveoConstants.Constants.PermissionIdentityType.User, "", user_email)
    # Set the permissions on the document
    allowAnonymous = True
    mydoc.SetAllowedAndDeniedPermissions([myperm], [], allowAnonymous)

    # Push the document
    push.AddSingleDocument(mydoc)
Exemple #5
0
def main():
    sourceId = '--Enter your source id--'
    orgId = '--Enter your org id--'
    apiKey = '--Enter your API key--'

    #Setup the push client
    push = CoveoPush.Push(sourceId, orgId, apiKey)

    myfile = 'testfiles\\Example.pptx'
    # Create a document
    mydoc = Document('file:///' + myfile)
    # Get the file contents and add it to the document
    mydoc.GetFileAndCompress(myfile)
    # Set the metadata
    mydoc.AddMetadata("connectortype", "CSV")
    authors = []
    authors.append("Coveo")
    authors.append("R&D")
    # rssauthors should be set as a multi-value field in your Coveo Cloud organization
    mydoc.AddMetadata("rssauthors", authors)
    # Set the title
    mydoc.Title = "THIS IS A TEST"
    # Set permissions
    user_email = "*****@*****.**"
    # Create a permission Identity
    myperm = CoveoPermissions.PermissionIdentity(
        CoveoConstants.Constants.PermissionIdentityType.User, "", user_email)
    # Set the permissions on the document
    allowAnonymous = True
    mydoc.SetAllowedAndDeniedPermissions([myperm], [], allowAnonymous)

    # Push the document
    push.AddSingleDocument(mydoc)
Exemple #6
0
def main():
    sourceId = '--Enter your source id--'
    orgId = '--Enter your org id--'
    apiKey = '--Enter your API key--'

    # Setup the push client
    push = CoveoPush.Push(sourceId, orgId, apiKey)

    # First add the document
    mydoc = Document("https://myreference&id=TESTME")
    # Set plain text
    mydoc.SetData("ALL OF THESE WORDS ARE SEARCHABLE")
    # Set FileExtension
    mydoc.FileExtension = ".html"
    # Add Metadata
    mydoc.AddMetadata("connectortype", "CSV")
    # Set the title
    mydoc.Title = "THIS IS A TEST"
    # Set permissions
    user_email = "*****@*****.**"
    # Create a permission identity
    myperm = CoveoPermissions.PermissionIdentity(
        CoveoConstants.Constants.PermissionIdentityType.User, "", user_email)
    # Set the permissions on the document
    allowAnonymous = True
    mydoc.SetAllowedAndDeniedPermissions([myperm], [], allowAnonymous)

    # Push the document
    push.AddSingleDocument(mydoc)

    time.sleep(100)

    # Remove it
    push.RemoveSingleDocument('https://myreference&id=TESTME')
def addNeighboorhood(neighboorhood):
    document_id = 'https://neighboorhood?id=' + neighboorhood['id']
    mydoc = Document(document_id)
    # Set the fileextension
    mydoc.FileExtension = ".html"
    mydoc.Title = neighboorhood['name']
    mydoc.AddMetadata('mydescr', neighboorhood['mydescr'])
    mydoc.AddMetadata('mycountry', neighboorhood['mycountry'])
    mydoc.AddMetadata('mycity', neighboorhood['mycity'])
    mydoc.AddMetadata('mylat', neighboorhood['mylat'])
    mydoc.AddMetadata('mylon', neighboorhood['mylon'])
    mydoc.AddMetadata('objecttype', 'Neighboorhood')

    return mydoc
Exemple #8
0
def addAvailability(avail):
    document_id = 'https://availability?id=' + avail['id']
    mydoc = Document(document_id)
    # Set the fileextension
    mydoc.FileExtension = ".html"
    mydoc.AddMetadata('myavailstatus', avail['id'])
    mydoc.AddMetadata('myhouseids', avail['myhouseids'])
    mydoc.AddMetadata('objecttype', 'Availability')

    return mydoc
Exemple #9
0
def main():
    sourceId = '--Enter your source id--'
    orgId = '--Enter your org id--'
    apiKey = '--Enter your API key--'

    # Setup the push client
    push = CoveoPush.Push(sourceId, orgId, apiKey)
    # Get a first Ordering Id
    startOrderingId = push.CreateOrderingId()

    # Create a document
    mydoc = Document("https://myreference&id=TESTME")
    # Set plain text
    mydoc.SetData("ALL OF THESE WORDS ARE SEARCHABLE")
    # Set FileExtension
    mydoc.FileExtension = ".html"
    # Add Metadata
    mydoc.AddMetadata("connectortype", "CSV")
    authors = []
    authors.append("Coveo")
    authors.append("R&D")
    # rssauthors should be set as a multi-value field in your Coveo Cloud organization
    mydoc.AddMetadata("rssauthors", authors)
    # Set the Title
    mydoc.Title = "THIS IS A TEST"
    # Set permissions
    user_email = "*****@*****.**"
    # Create a permission identity
    myperm = CoveoPermissions.PermissionIdentity(
        CoveoConstants.Constants.PermissionIdentityType.User, "", user_email)
    # Set the permissions on the document
    allowAnonymous = True
    mydoc.SetAllowedAndDeniedPermissions([myperm], [], allowAnonymous)

    # Push the document
    push.AddSingleDocument(mydoc)

    # Delete older documents
    push.DeleteOlderThan(startOrderingId)
Exemple #10
0
def createDoc(myfile, version):
    # Create a document
    mydoc = Document('file:///' + version + "/" + myfile)
    # Get the file and compress it
    mydoc.GetFileAndCompress(myfile)
    # Set Metadata
    mydoc.AddMetadata("connectortype", "CSV")
    authors = []
    authors.append("Coveo")
    authors.append("R&D")
    # rssauthors should be set as a multi-value field in your Coveo Cloud organization
    mydoc.AddMetadata("rssauthors", authors)
    mydoc.Title = "THIS IS A TEST"
    # Set permissions
    user_email = "*****@*****.**"
    myperm = CoveoPermissions.PermissionIdentity(
        CoveoConstants.Constants.PermissionIdentityType.User, "", user_email)
    mydoc.SetAllowedAndDeniedPermissions([myperm], [], True)
    return mydoc
Exemple #11
0
def main():
    sourceId = os.environ.get('PUSH_SOURCE_ID') or '--Enter your source id--'
    orgId = os.environ.get('PUSH_ORG_ID') or '--Enter your org id--'
    apiKey = os.environ.get('PUSH_API_KEY') or '--Enter your API key--'

    # Setup the push client
    push = CoveoPush.Push(sourceId, orgId, apiKey)

    # Create a document
    mydoc = Document("https://myreference/doc2")
    mydoc.SetData("This is document Two")
    mydoc.FileExtension = ".html"
    mydoc.AddMetadata("authors", "*****@*****.**")
    mydoc.Title = "What's up Doc 2?"

    # Push the document
    push.AddSingleDocument(mydoc)
Exemple #12
0
def main():
    sourceId = '--Enter your source id--'
    orgId = '--Enter your org id--'
    apiKey = '--Enter your API key--'

    # Setup the push client
    push = CoveoPush.Push(sourceId, orgId, apiKey)
    # Get a first Ordering Id
    startOrderingId = push.CreateOrderingId()

    # Create a document
    mydoc = Document('https://myreference&id=TESTME')
    # Set the content. This will also be available as the quickview.
    content = "<meta charset='UTF-16'><meta http-equiv='Content-Type' content='text/html; charset=UTF-16'><html><head><title>My First Title</title><style>.datagrid table { border-collapse: collapse; text-align: left; } .datagrid {display:table !important;font: normal 12px/150% Arial, Helvetica, sans-serif; background: #fff; overflow: hidden; border: 1px solid #006699; -webkit-border-radius: 3px; -moz-border-radius: 3px; border-radius: 3px; }.datagrid table td, .datagrid table th { padding: 3px 10px; }.datagrid table thead th {background:-webkit-gradient( linear, left top, left bottom, color-stop(0.05, #006699), color-stop(1, #00557F) );background:-moz-linear-gradient( center top, #006699 5%, #00557F 100% );filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#006699', endColorstr='#00557F');background-color:#006699; color:#FFFFFF; font-size: 15px; font-weight: bold; border-left: 1px solid #0070A8; } .datagrid table thead th:first-child { border: none; }.datagrid table tbody td { color: #00496B; border-left: 1px solid #E1EEF4;font-size: 12px;font-weight: normal; }.datagrid table tbody  tr:nth-child(even)  td { background: #E1EEF4; color: #00496B; }.datagrid table tbody td:first-child { border-left: none; }.datagrid table tbody tr:last-child td { border-bottom: none; }</style></head><body style='Font-family:Arial'><div class='datagrid'><table><tbody><tr><td>FirstName</td><td>Willem</td></tr><tr><td>MiddleName</td><td>Van</td></tr><tr><td>LastName</td><td>Post</td></tr><tr><td>PositionDescription</td><td>VP Engineering</td></tr><tr><td>JobFunction</td><td>CTO</td></tr><tr><td>JobFamily</td><td>Management</td></tr></tbody></table></div></body></html>"
    mydoc.SetContentAndZLibCompress(content)
    # Set the metadata
    mydoc.AddMetadata("connectortype", "CSV")
    authors = []
    authors.append("Coveo")
    authors.append("R&D")
    # rssauthors should be set as a multi-value field in your Coveo Cloud organization
    mydoc.AddMetadata("rssauthors", authors)
    # Set the title
    mydoc.Title = "THIS IS A TEST"
    # Add a user email to be used for identities
    user_email = "*****@*****.**"
    # Create a permission identity
    myperm = CoveoPermissions.PermissionIdentity(
        CoveoConstants.Constants.PermissionIdentityType.User, "", user_email)
    # Set the permissions on the document
    allowAnonymous = True
    mydoc.SetAllowedAndDeniedPermissions([myperm], [], allowAnonymous)

    # Push the document
    push.AddSingleDocument(mydoc)

    # Delete older documents
    push.DeleteOlderThan(startOrderingId)
Exemple #13
0
    # Make a string with the types separated by semicolumn
    # This is for the multi value facet
    poke_type = ""
    for pokemon_type in poke_types_entry:
        poke_type += pokemon_type.text + ";"
    poke_type = poke_type[:-1]

    # This make sure that there are no special characthers
    try:
        print(name)
    except:
        name = name[:-1]

    # First add the document
    mydoc = Document(link)

    # Set plain text
    mydoc.SetData(name + ' ' + poke_type.replace(";", " ") + ' ' + generation)

    # Set FileExtension
    mydoc.FileExtension = ".html"

    # Add Metadata
    mydoc.AddMetadata("connectortype", "HTML")
    mydoc.AddMetadata("pokemon_name", name)
    mydoc.AddMetadata("pokemon_picture", pic_url)
    mydoc.AddMetadata("pokemon_type", poke_type)
    mydoc.AddMetadata("pokemon_generation", generation[len(generation) - 1:])
    mydoc.AddMetadata("pokemon_number", number)
Exemple #14
0
def add_document(house):
    global goldMember
    global silverMember
    global platinumMember
    global publicMember
    global neigboorhoods
    global push
    global noratings
    global BASE_URL
    global currentExport
    # Use username as unique identifier
    # id,listing_url,scrape_id,last_scraped,name,summary,space,description,experiences_offered,neighborhood_overview,notes,transit,access,interaction,house_rules,thumbnail_url,medium_url,picture_url,xl_picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,street,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,city,state,zipcode,market,smart_location,country_code,country,latitude,longitude,is_location_exact,property_type,room_type,accommodates,bathrooms,bedrooms,beds,bed_type,amenities,square_feet,price,weekly_price,monthly_price,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,requires_license,license,jurisdiction_names,instant_bookable,is_business_travel_ready,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month

    meta = dict()
    body = ""
    document_id = ""

    # We have a normal movie
    document_id = house['listing_url']
    mydoc = Document(document_id)
    # Set the fileextension
    mydoc.FileExtension = ".html"

    #print (house)

    imageurl = house['picture_url'].replace('aki_policy=large',
                                            'aki_policy=medium')
    """ if not os.path.exists("images\\"+house['id']+".jpg"):
      try:
        print ("Get Image "+str(house['id']))
        urllib.request.urlretrieve(imageurl, "images\\"+house['id']+".jpg")
        time.sleep(0.1)
      except:
        return ""
    imageurl="images\\"+house['id']+".jpg" """
    # Build up the quickview/preview (HTML)
    content = "<html><head><meta charset='UTF-16'><meta http-equiv='Content-Type' content='text/html; charset=UTF-16'>"
    #    content = "<link href=\"https://fonts.googleapis.com/css?family=Montserrat:400,600&display=swap\" rel=\"stylesheet\">"
    content = content + "<style>"
    content = content + " body > div:nth-child(2) { display: none }"
    content = content + " [id^=CoveoHighlight] {background-color: white !important;}"
    content = content + " .side .title {"
    content = content + "    color: #000 !important; font-size: 16pt !important;padding-bottom: 15px;"
    content = content + "  }"
    content = content + "  .side .host {"
    content = content + "    display:inline-block;"
    content = content + "    color: gray;padding-bottom: 15px;"
    content = content + "  }"
    content = content + "  .side .city, .side .state, .side .country {"
    content = content + "    display: inline-block;"
    content = content + "    padding-left: 10px;"
    content = content + "    color: gray;"
    content = content + "  }"
    content = content + "  .side .header_info {"
    content = content + "    font-size: 12pt;"
    #content = content + "    font-weight: bold;"
    content = content + "  }"
    content = content + "  .side .info {"
    content = content + "    font-size: 10pt;padding-bottom: 15px;"
    content = content + "  }"
    content = content + "  .side .infos {"
    content = content + "    color: gray;padding-bottom: 15px;display:inline-block;padding-right: 10px;"
    content = content + "  }"
    content = content + "  .myimage img, .image img{"
    content = content + "    box-shadow: 1px 3px 5px 0px gray;"
    content = content + "   width: 100%;"
    content = content + "  }"
    content = content + "    .myimage img {"
    content = content + "      border-radius: 2px;"
    content = content + "     max-height:250px;"
    content = content + "  }"
    content = content + "  .image img {"
    content = content + "    border-radius: 4px;"
    content = content + "    max-height:350px;max-width: 350px;"
    content = content + "}"
    content = content + "  .image {"
    content = content + "   float: left;"
    content = content + "   padding: 5px;"
    content = content + "   padding-right: 15px;"
    content = content + "  }"
    content = content + " .amenities { font-size: 11pt; column-count: 3;max-width: 50%;}"
    content = content + " .inf_title { display:inline-block;padding-right: 5px; font-size: 0.8em;  font-style: italic;}"
    content = content + " .inf_value { display:inline-block;}"
    content = content + " body {font-family: 'Verdana', sans-serif !important;}"
    content = content + " ul {list-style: none;font-size:11pt}"
    content = content + " li {padding-right: 5px}"
    content = content + " .host {padding-right: 5px}"
    content = content + " .city {padding-right: 5px}"
    content = content + " .state {padding-right: 5px}"
    content = content + " .country {padding-right: 5px}"
    #content = content + " .host::before {content: 'By'; padding-right: 5px; font-size: 0.8em;  font-style: italic;}"
    #content = content + " .city::before {content: 'In'; padding-right: 5px; font-size: 0.8em;  font-style: italic;}"
    #content = content + " .state::before {content: 'State'; padding-right: 5px; font-size: 0.8em;  font-style: italic;}"
    #content = content + " .country::before {content: 'Country'; padding-right: 5px; font-size: 0.8em;  font-style: italic;}"
    content = content + "</style>"
    content = content + "</head>"
    # The below is NOT allowed
    # content = content + "<script>"
    # content = content + "  function removeHigh() {"
    # content = content + "    console.log('removeHigh called');"
    # content = content + "    var high=document.querySelectorAll('[id^=\"CoveoHighlight\"]');"
    # content = content + "    high.forEach(k => { k.style.backgroundColor =\"white\"; } );"
    # content = content + " }"
    # content = content + "document.addEventListener('DOMContentLoaded', function () {"
    # content = content + " removeHigh();"
    # content = content + " document.querySelector('body > div:nth-child(2)').style.display=\"none\";"
    # content = content + "});"
    # content = content + "</script>"
    # content = content+ "<title>"+movie['title']+"    ("+movie["release_date"].split('-')[0]+")</title>"
    content = content + "<title>" + house['name'] + "</title>"
    content = content + "<body>"

    content = content + "<div class='header'>"
    content = content + "<div class='imageblock'><div class='image'>"
    if (house['picture_url']):
        content = content + " <img class='imageimg' src='" + house[
            'picture_url'] + "' onerror=\"javascript:this.src='images/emptyHouse.jpg'\">"

    content = content + "</div><div class='side' style='padding-left: 370px;'><div class='title'>" + house[
        "name"] + "</div>"
    #content = content + "<ul>"
    #content = content + "<li>By<span class='host'>"+house["host_name"]+"<span></li>"
    #content = content + "<li>In<span class='city'>"+house["city"]+"<span></li>"
    #content = content + "<li>State<span class='state'>"+house["state"]+"<span></li>"
    #content = content + "<li>Country<span class='country'>"+house["country"]+"<span></li>"
    #content = content + "</ul>"
    content = content + "<div class='header_info'>Overview</div>"
    #content = content + "<div class='infos'><div class='inf_title'>Property type</div><div class='inf_value'>"+house["property_type"]+"</div></div>"
    #content = content + "<div class='infos'><div class='inf_title'>Room type</div><div class='inf_value'>"+house["room_type"]+"</div></div>"
    #content = content + "<div class='infos'><div class='inf_title'>Bed type</div><div class='inf_value'>"+house["bed_type"]+"</div></div>"

    #content = content + "<div class='header_info'>Summary</div>"
    content = content + "<div class='info'>" + house["summary"] + "</div>"
    if house["description"]:
        content = content + "<div class='header_info'>Full Description</div>"
        content = content + "<div class='info'>" + house[
            "description"] + "</div>"
    if house["space"]:
        content = content + "<div class='header_info'>Space</div>"
        content = content + "<div class='info'>" + house["space"] + "</div>"
    if house["neighborhood_overview"]:
        content = content + "<div class='header_info'>Neighborhood</div>"
        content = content + "<div class='info'>" + house[
            "neighborhood_overview"] + "</div>"
    #content = content + "<div class='header_info'>Amenities</div>"
    amenities = house['amenities'].replace('"', '').replace('{', '').replace(
        '}', '').split(',')
    #content = content + "<div class='amenities'>"+  '<br>'.join(amenities)+"</div>"
    content = content + "</body></html>"
    #put content also in fields for Sitecore dumps
    #meta["sitecorePage"] = content
    meta["sitecoreDescription"] = house["description"]
    meta["sitecoreSpace"] = house["space"]
    meta["sitecoreNeighbourhood"] = house["neighborhood_overview"]
    # Geocode

    body = ""
    mydoc.SetContentAndZLibCompress(content)
    meta["connectortype"] = "Push"
    meta["mytype"] = "Houses"
    meta["myhouseid"] = house['id']
    meta["myimage"] = imageurl
    meta["mycountry"] = house['country']
    meta["mycity"] = house['city']
    meta["myprice"] = house["price"].replace('$', '')  #new
    price = float(meta["myprice"].replace(",", ''))
    if (price <= 130):
        members = "Public;Gold;Silver;Platinum"
    if (price > 130 and price < 180):
        members = "Gold;Silver;Platinum"
    if (price >= 180 and price < 250):
        members = "Silver;Platinum"
    if (price >= 250):
        members = "Platinum"
    if not noratings:
        #add ratings
        addme = randint(1, 5)
        rating = randint(1, 5)
        if (addme > 2):
            myrate = addRating({
                'id': house['id'] + 'A',
                'house_id': house['id'],
                'type': 'Business',
                'age': '25-40',
                'rate': rating
            })
            push.Add(myrate)
        rating = randint(1, 5)
        myrate = addRating({
            'id': house['id'] + 'B',
            'house_id': house['id'],
            'type': 'Business',
            'age': '40-50',
            'rate': rating
        })
        push.Add(myrate)
        if (price > 180):
            rating = randint(1, 5)
            myrate = addRating({
                'id': house['id'] + 'C',
                'house_id': house['id'],
                'type': 'Business',
                'age': '50+',
                'rate': rating
            })
            push.Add(myrate)
            rating = randint(1, 5)
            myrate = addRating({
                'id': house['id'] + 'D',
                'house_id': house['id'],
                'type': 'Family',
                'age': '25-35',
                'rate': rating
            })
            push.Add(myrate)
            if (addme > 2):
                rating = randint(1, 5)
                myrate = addRating({
                    'id': house['id'] + 'E',
                    'house_id': house['id'],
                    'type': 'Family',
                    'age': '35-50',
                    'rate': rating
                })
                push.Add(myrate)
        if (price <= 180):
            rating = randint(1, 5)
            myrate = addRating({
                'id': house['id'] + 'F',
                'house_id': house['id'],
                'type': 'Individual',
                'age': '20-30',
                'rate': rating
            })
            push.Add(myrate)
            if (addme > 2):
                rating = randint(1, 5)
                myrate = addRating({
                    'id': house['id'] + 'G',
                    'house_id': house['id'],
                    'type': 'Individual',
                    'age': '30-40',
                    'rate': rating
                })
                push.Add(myrate)
            rating = randint(1, 5)
            myrate = addRating({
                'id': house['id'] + 'I',
                'house_id': house['id'],
                'type': 'Individual',
                'age': '40-50',
                'rate': rating
            })
            push.Add(myrate)

    meta["mymemberships"] = members
    meta["myproptype"] = house['property_type']  #new
    meta["myroomtype"] = house['room_type']  #new
    meta["mynopersons"] = house['accommodates']  #new
    meta["mybathrooms"] = house['bathrooms']  #new
    meta["mybedrooms"] = house['bedrooms']  #new
    meta["mybeds"] = house['beds']  #new
    meta["mybathroomsf"] = house['bathrooms']  #new
    meta["mybedroomsf"] = house['bedrooms']  #new
    meta["mybedsf"] = house['beds']  #new
    meta["mybedtype"] = house['bed_type']  #new
    meta["myneighbourhood"] = house['neighbourhood_cleansed']  #new
    # check if we already have the neighboorhood
    if (house['neighbourhood_cleansed'] + house['city'] not in neigboorhoods):
        if (len(house['neighborhood_overview']) > 400):
            neigboorhoods[house['neighbourhood_cleansed'] +
                          house['city']] = "WeHaveIt"
            print("Adding neighboorhood: " + house['neighbourhood_cleansed'] +
                  house['city'])
            #Add to index
            myneigh = addNeighboorhood({
                'id':
                house['neighbourhood_cleansed'] + house['city'],
                'mydescr':
                house['neighborhood_overview'],
                'name':
                house['neighbourhood_cleansed'],
                'mycity':
                house['city'],
                'mycountry':
                house['country'],
                'mylat':
                house['latitude'],
                'mylon':
                house['longitude']
            })
            push.Add(myneigh)
    meta["myamenities"] = ';'.join(amenities)  #new
    meta["myvotecount"] = house['review_scores_rating']
    meta["myhost"] = house['host_name']  #new
    meta["myhostid"] = house['host_id']  #new
    meta["objecttype"] = "House"  #new
    meta["language"] = "English"
    #meta["sitecoreurl"] = BASE_URL+house['country']+"/"+house['city']+"/"+house['id']
    meta["sitecoreurl"] = (BASE_URL + house['country'] + "/" +
                           house['id']).lower()
    meta["myroomprop"] = house['property_type'] + ";" + house[
        'property_type'] + "|" + house['room_type']

    meta["title"] = house["name"]
    # meta["topparentid"]= movie['id']
    mydoc.ClickableUri = meta["sitecoreurl"]  #house['listing_url']
    mydoc.Date = house['last_scraped']

    meta["mylon"] = house['longitude']
    meta["mylat"] = house['latitude']
    #Dump meta inside new JSON for import in Sitecore

    for key in meta:
        if ('sitecore' not in key):
            mydoc.AddMetadata(key, meta[key])
    return mydoc
Exemple #15
0
def scrap():
    pokemon_list_page = requests.get('https://pokemondb.net/pokedex/national')
    soup_pokemon_list_page = BeautifulSoup(pokemon_list_page.content,
                                           'html.parser')
    results = soup_pokemon_list_page.find(id='main')
    info_cards = results.find_all('div', class_='infocard')

    coveo_source_id = os.environ.get("COVEO_SOURCE_ID")
    coveo_api_key = os.environ.get("COVEO_API_KEY")
    coveo_org_id = os.environ.get("COVEO_ORG_ID")

    push = CoveoPush.Push(coveo_source_id, coveo_org_id, coveo_api_key)
    push.Start(True, True)
    push.SetSizeMaxRequest(150 * 1024 * 1024)

    user_email = os.environ.get("USER_EMAIL")
    my_permissions = CoveoPermissions.PermissionIdentity(
        CoveoConstants.Constants.PermissionIdentityType.User, "", user_email)

    for info_card in info_cards:
        pokemon_name = info_card.find('a', class_='ent-name').text
        pokemon_page_url = 'https://pokemondb.net' + info_card.find(
            'a', class_='ent-name')['href']

        document = Document(pokemon_page_url)

        pokemon_picture_url = info_card.find('span',
                                             class_='img-fixed img-sprite')

        if pokemon_picture_url is None:
            pokemon_picture_url = info_card.find(
                'span',
                class_='img-fixed img-sprite img-sprite-v18')['data-src']
        else:
            pokemon_picture_url = info_card.find(
                'span', class_='img-fixed img-sprite')['data-src']

        pokemon_number = info_card.find('small').text[1:]
        pokemon_gen = find_gen(int(pokemon_number))
        pokemon_types = []
        pokemon_types_tags = info_card.find_all('small')[1].find_all('a')

        print('scrapping pokemon: ' + pokemon_name + ' | index : ' +
              pokemon_number)

        for pokemon_type_tag in pokemon_types_tags:
            pokemon_types.append(pokemon_type_tag.text)

        pokemon_page = requests.get(pokemon_page_url)
        soup_pokemon_page = BeautifulSoup(pokemon_page.content, 'html.parser')
        results = soup_pokemon_page.find(id='main')
        tables = results.find_all('table', class_='vitals-table')

        pokemon_species = tables[0].find_all('tr')[2].find('td').text
        pokemon_height = tables[0].find_all('tr')[3].find('td').text
        pokemon_weight = tables[0].find_all('tr')[4].find('td').text

        base_stats = {}
        base_stats_tags = tables[3].find_all('tr')

        for base_stat_tag in base_stats_tags:
            base_stats[base_stat_tag.find('th').text] = base_stat_tag.find(
                'td').text

        defense = {}
        defenses_tables = results.find_all(
            'table', class_='type-table type-table-pokedex')

        for defense_table in defenses_tables:
            for x in range(
                    0, len(defense_table.find_all('tr')[0].find_all('th'))):
                defense[defense_table.find_all('tr')[0].find_all('th')[x].find('a').text] = \
                    defense_table.find_all('tr')[1].find_all('td')[x].text
                document.AddMetadata(
                    defense_table.find_all('tr')[0].find_all('th')[x].find(
                        'a').text,
                    defense_table.find_all('tr')[1].find_all('td')[x].text)

        document.Title = pokemon_name
        document.SetData(pokemon_page.text)
        document.FileExtension = ".html"
        document.AddMetadata('name', pokemon_name)
        document.AddMetadata('url', pokemon_page_url)
        document.AddMetadata('number', pokemon_number)
        document.AddMetadata('generation', pokemon_gen)
        document.AddMetadata('types', pokemon_types)
        document.AddMetadata('specie', pokemon_species)
        document.AddMetadata('weight', pokemon_weight)
        document.AddMetadata('weight_int',
                             pokemon_weight[0:pokemon_weight.index('kg') - 1])
        document.AddMetadata('height', pokemon_height)
        document.AddMetadata('height_int',
                             pokemon_height[0:pokemon_height.index('m') - 1])
        document.AddMetadata('hp', base_stats.get('HP'))
        document.AddMetadata('hp_int', base_stats.get('HP'))
        document.AddMetadata('attack', base_stats.get('Attack'))
        document.AddMetadata('attack_int', base_stats.get('Attack'))
        document.AddMetadata('defense', base_stats.get('Defense'))
        document.AddMetadata('defense_int', base_stats.get('Defense'))
        document.AddMetadata('sp_atk', base_stats.get('Sp.Atk'))
        document.AddMetadata('sp_def', base_stats.get('Sp.Def'))
        document.AddMetadata('speed', base_stats.get('Speed'))
        document.AddMetadata('speed_int', base_stats.get('Speed'))
        document.AddMetadata('picture_url', pokemon_picture_url)
        document.SetAllowedAndDeniedPermissions([my_permissions], [], True)

        print('Send: ' + pokemon_name + ' | index : ' + pokemon_number +
              ' to the PUSH API')
        push.Add(document)
        print('Sent: ' + pokemon_name + ' | index : ' + pokemon_number +
              ' to the PUSH API')

    push.End(True, True)
Exemple #16
0
def add_document(movie, mcountry, mcity, mregion, mlat, mlon, counter):

    # Use username as unique identifier
    meta = dict()
    body = ""
    document_id = ""

    # We have a normal movie
    document_id = 'https://www.themoviedb.org/movie/' + str(movie['id'])+'/'+str(counter)
    mydoc = Document(document_id)
    # Set the fileextension
    mydoc.FileExtension = ".html"
    # alt titles
    alttitles = ""
    for alttitle in movie['alternative_titles']['titles']:
        alttitles = alttitles+alttitle['title']+"<BR>"

    # countries
    countries = ""
    firstcountry = ""
    first = True
    for country in movie['production_countries']:
        countries = countries+country['name']+";"
        if first:
            firstcountry = country['iso_3166_1']
            first = False
    # keywords
    keywords = ""
    for keyword in movie['keywords']['keywords']:
        keywords = keywords+keyword['name']+" - "
    # genre
    genres = ""
    for genre in movie['genres']:
        genres = genres+genre['name'].title()+";"
    # cast
    allpeople = ""
    casts = ""
    castsfull = ""
    for cast in movie['credits']['cast']:
        character = cast['character']
        character = character.lower().replace(' and ', ' & ')
        character = character.title()
        casts = casts+character+";"
        # if character:
          # casts=casts+cast['name']+" as "+cast['character']+";"
        # else:
          # casts=casts+cast['name']+";"
        if allpeople.find(cast['name']) == -1:
            allpeople = allpeople+cast['name']+';'
        if (cast['profile_path']):
            castsfull = castsfull+"<li class='cast'><img class='castimg' src='https://image.tmdb.org/t/p/w66_and_h66_bestv2" + \
                cast['profile_path']+"'><div class='info'><b>"+ \
                    cast['name']+"</b><br>"+character+"<br></div></li>"
        else:
            castsfull = castsfull+"<li class='cast'><div class='noimage'></div><div class='info'><b>" + \
                cast['name']+"</b><br>"+character+"<br></div></li>"
    if castsfull:
        castsfull = "<ol class='castlist'>"+castsfull+"</ol>"
    # crews
    crews = ""
    crewsfull = ""
    for crew in movie['credits']['crew']:
        if allpeople.find(crew['name']) == -1:
            allpeople = allpeople+crew['name']+';'
        crews = crews+crew['name']+" as "+crew['job']+";"
        if (crew['profile_path']):
            crewsfull = crewsfull+"<li class='cast'><img class='castimg' src='https://image.tmdb.org/t/p/w66_and_h66_bestv2" + \
                crew['profile_path']+"'><div class='info'><b>"+ \
                    crew['name']+"</b><br>"+crew['job']+"<br></div></li>"
        else:
            crewsfull = crewsfull+"<li class='cast'><div class='noimage'></div><div class='info'><b>" + \
                crew['name']+"</b><br>"+crew['job']+"<br></div></li>"
    if crewsfull:
        crewsfull = "<ol class='castlist'>"+crewsfull+"</ol>"
    # spoken
    spoken = ""
    for spoke in movie['spoken_languages']:
        spoken = spoken+spoke['name']+";"

    # add to completions
    try:
        year = int(movie["release_date"].split('-')[0])
    except:
        year = 0
    
    relatedartist = ""
    relatedsongs = ""
    if movie['popularity'] == 1e-06:
        movie['popularity'] = 0

    # Build up the quickview/preview (HTML)
    content = "<html><head><meta charset='UTF-16'><meta http-equiv='Content-Type' content='text/html; charset=UTF-16'>"
    content = content + "</head>"
    # content = content+ "<title>"+movie['title']+"    ("+movie["release_date"].split('-')[0]+")</title>"
    content = content + "<title>"+movie['title']+" ("+mcity+")</title>"
    content = content + "<body>"
    content = content + "<style>body {    -ms-overflow-style: -ms-autohiding-scrollbar;    background-color: #f4f4f4;    color: #000;    font-family: 'Source Sans Pro', Arial, sans-serif;    font-size: 1em;    -webkit-font-smoothing: antialiased;    -moz-osx-font-smoothing: grayscale;}"
    content = content + \
        " .header { width: 100%;  position: relative;  z-index: 1;box-sizing:box}"
    content = content + \
        " .imageblock { display: inline-flex; background-image: radial-gradient(circle at 20% 50%, rgba(11.76%, 15.29%, 17.25%, 0.98) 0%, rgba(11.76%, 15.29%, 17.25%, 0.88) 100%);}"
    if movie['backdrop_path']:
        content = content + " .header:before {        content: '';        position: absolute;        left: 0;        right: 0;     height:100%;   width: 100%;        z-index: -1;        display: block;        filter: opacity(100) grayscale(100%) contrast(130%);        background-size: cover;        background-repeat: no-repeat;        background-position: 50% 50%;        background-image: url('https://image.tmdb.org/t/p/w1400_and_h450_bestv2"+movie[
            'backdrop_path']+"');        will-change: opacity;        transition: filter 1s;      }"
        meta["mybackdrop"] = movie['backdrop_path']
    content = content + \
        " .image { padding-left:20px;padding-bottom:20px;padding-top:40px;display: block;  width: 300px; height: 450px; position: relative;   z-index: 2;}"
    content = content + \
        " .imageimg {-webkit-box-shadow: 0px 0px 5px 2px rgba(255,255,255,1);-moz-box-shadow: 0px 0px 5px 2px rgba(255,255,255,1);box-shadow: 0px 0px 5px 2px rgba(255,255,255,1); display: block;    width: 300px;    height: 450px;       border-radius: 4px;}"
    content = content + \
        " .side {padding-top:40px;padding-bottom:40px;margin-left: 15px; color: #ffffff; }"
    content = content + " .noimage {width: 66px;    height: 66px;    line-height: 66px;    font-size: 33px;    display: inline-block;    font-family: 'Arial';    text-align: center;    background-color: #dbdbdb;    color: #b5b5b5;    box-sizing: border-box;    font-size: 1em;    border-radius: 4px;    border: 1px solid #d7d7d7;}"
    content = content + " .noimage:before { content: \"X\";}"
    content = content + \
        " .castlist {    list-style-type: none;    list-style-position: outside;      margin: 0;    display: flex;    flex-wrap: wrap;    justify-content: flex-start;}"
    content = content + \
        " .castimg {  box-sizing: border-box; line-height: 66px;    font-size: 33px; display:inline-block; width: 66px;    height: 66px;    border-radius: 4px;-webkit-box-shadow: 2px 2px 3px 1px rgba(128,119,128,1);-moz-box-shadow: 2px 2px 3px 1px rgba(128,119,128,1);box-shadow: 2px 2px 3px 1px rgba(128,119,128,1);}"
    content = content + " .cast {  width: 25%;padding-bottom:10px; }"
    content = content + \
        " div.info {     display: block;  width:60%;  align-items: center;  padding-top:5px;  padding-left: 14px;    padding-right: 20px;    }"
    content = content + \
        " div.title h2 {margin: 0;   padding: 0;    font-size: 2.4em;    line-height: 1.1em;    font-weight: 700;    display: inline-block;}"
    content = content + \
        " .year {  padding-left:10px; opacity: 0.6;  font-size: 1.7em;   font-weight: 400;}"
    content = content + " div.title { width: 100%; margin-bottom: 30px;}"
    content = content + " div.header_info {  width: 100%;}"
    content = content + \
        " h3 {  font-weight: 600; line-height: 1.3em; font-size: 1.3em;  margin-bottom: 8px;}"
    content = content + \
        " p.over { font-size: 1em;  line-height: 1.4em;-webkit-margin-before: 0.2em !important;}"
    content = content + " </style>"
    content = content + "<div class='header'>"
    content = content + "<div class='imageblock'><div class='image'>"
    if (movie['poster_path']):
        content = content + " <img class='imageimg' src='https://image.tmdb.org/t/p/w300_and_h450_bestv2" + \
            movie['poster_path']+"'>"
    content = content + "</div><div class='side'><div class='title'><h2 style='display:inline-block'>" + \
        movie["title"]+"</h2><span class='year'>("+movie["release_date"].split('-')[
                                                  0]+")</span></div>"
    content = content + "<div class='header_info'><h3>Overview</h3></div>"
    content = content + "<div><p class='over'>" + \
        movie["overview"]+"</p></div>"
    content = content + "<div class='header_info'><h3>Other titles</h3></div>"
    content = content + "<div><p class='over'>" + \
        movie["original_title"]+"</p></div>"
    if (movie["tagline"]):
        content = content + "<div class='header_info'><h3>Tagline</h3></div>"
        content = content + "<div><p class='over'>" + \
            movie["tagline"]+"</p></div>"
    content = content + "</div></div></div>"  # Sidepanel#Imageblock
    content = content + "<div class='header_info'><h3>Cast</h3></div>"
    content = content + "<div>"+castsfull+"</div>"
    content = content + " <div class='header_info'><h3>Featured Crew</h3></div>"
    content = content + "<div>"+crewsfull+"</div>"
    if 'relatedartist' in movie:
        if (movie["relatedartist"]):
            relatedartist = movie["relatedartist"]
            relatedsongs = movie["relatedsongs"]
            content = content + "<div class='header_info'><h3>Soundtrack</h3></div>"
            content = content + "<div><p class='over'>Artists: " + \
                html.unescape(movie["relatedartist"]).replace(";", " - ")
            content = content + "<br>Songs: " + \
                html.unescape(movie["relatedsongs"]).replace(
                    ";", " - ")+"</p></div>"

    content = content + "<div class='header_info'><h3>Other info</h3></div>"
    content = content + "<div><p class='over'>Status: "+movie["status"]
    content = content + "<br>Release date: "+movie["release_date"]
    content = content + "<br>Budget: "+'${:0,.2f}'.format(movie["budget"])
    content = content + "<br>Revenue: " + \
        '${:0,.2f}'.format(movie["revenue"])
    content = content + "<br>Profit: " + \
        '${:0,.2f}'.format(movie["revenue"]-movie["budget"])
    content = content + "<br>Popularity: "+str(int(movie["popularity"]))
    content = content + "<br>Spoken languages: " + \
        spoken[:-1].replace(";", ' - ')
    content = content + "<br>Genres: "+genres[:-1].replace(";", ' - ')
    content = content + "<br>Keywords: " + \
        html.unescape(keywords[:-1]).replace(";", " - ")+"</p></div>"

    if 'mysentimentvalue' in movie:
        content = content + \
            "<div class='header_info'><h3>Sentiment (by MeaningCloud) on Reviews</h3></div>"
        content = content + "<div><p class='over'>Sentiment: " + \
            movie["mysentimentvalue"]
        content = content + "<br>Agreement: "+movie["mysentimentagree"]
        content = content + "<br>Subjectivity: "+movie["mysentimentsubj"]
        content = content + "<br>Irony: " + \
            movie["mysentimentirony"]+"</p></div>"
        # content = content + "<br>Based upon:<br><p style='font-size:30%;'>"+movie["allreviews"]+"</p>"

    content = content + "</body></html>"

    # Geocode

    body = ""
    # For reviews
    containsattachment = ""
    if (movie["allreviews"]):
        containsattachment = True
    mydoc.SetContentAndZLibCompress(content)
    meta["connectortype"] = "Push"
    meta["mytype"] = "Movie"
    meta["myimage"] = movie["poster_path"]
    meta["mycountry"] = mcountry
    meta["mycity"] = mcity
    meta["myrevenue"] = movie["revenue"]
    meta["containsattachment"] = containsattachment
    meta["mygenre"] = html.unescape(genres)
    meta["myvotecount"] = movie["vote_count"]
    meta["language"] = "English"
    meta["mystatus"] = movie["status"]
    meta["myrelatedartist"] = html.unescape(relatedartist)
    meta["myrelatedsongs"] = html.unescape(relatedsongs)
    meta["myspokenlang"] = html.unescape(spoken)
    meta["mypeople"] = html.unescape(allpeople)
    meta["mycast"] = html.unescape(casts)
    meta["mycrews"] = crews
    if "imdb_id" in movie:
        meta["myimdb"] = movie["imdb_id"]
    meta["myreviews"] = movie["allreviews"]
    meta["mypopularity"] = int(movie["popularity"])
    meta["myvoteaverage"] = movie["vote_average"]
    meta["mybudget"] = movie["budget"]
    myprofitvalue = 0
    myprofit = movie["revenue"]-movie["budget"]
    # this could mess up the ranking big time
    myprofitvalue = myprofit/1000000
    if (myprofitvalue > 1000):
        myprofitvalue = 1000
    meta["myprofit"] = myprofit
    meta["myprofitvalue"] = myprofitvalue
    meta["title"] = movie["title"]+' ('+mcity+')'
    # meta["topparentid"]= movie['id']
    mydoc.ClickableUri = 'https://www.themoviedb.org/movie/' + str(movie['id'])
    meta["myid"] = movie['id']
    meta["myvid"] = str(movie['id'])
    meta["myids"] = str(movie['id'])
    mydoc.Date = movie['release_date']

    # sentiment
    if 'mysentimentvalue' in movie:
        meta["mysentimentvalue"] = movie["mysentimentvalue"]
        meta["mysentimentagree"] = movie["mysentimentagree"]
        meta["mysentimentsubj"] = movie["mysentimentsubj"]
        meta["mysentimentirony"] = movie["mysentimentirony"]
        sentval = 0
        addval = 0
        if (movie['mysentimentvalue'] == "Strong Positive"):
            sentval = 2
            addval = -1
        if (movie["mysentimentvalue"] == "Positive"):
            sentval = 1
            addval = -1
        if (movie["mysentimentvalue"] == "Neutral"):
            sentval = 0
            addval = 0
        if (movie["mysentimentvalue"] == "Negative"):
            sentval = -1
            addval = 1
        if (movie["mysentimentvalue"] == "Strong Negative"):
            sentval = -2
            addval = 1
        if (movie['mysentimentagree'] == "Disagreement"):
            sentval = sentval+addval
        if (movie['mysentimentagree'] == "Agreement"):
            sentval = sentval-addval
        if (movie['mysentimentsubj'] == "Subjective"):
            sentval = sentval+addval
        if (movie['mysentimentsubj'] == "Objective"):
            sentval = sentval-addval
        if (movie['mysentimentirony'] == "Ironic"):
            sentval = sentval-addval
        if (movie['mysentimentirony'] == "Non-Ironic"):
            sentval = sentval+addval
        meta["mysentimentnumber"] = sentval

    if (lat != -999):
        meta["mylon"] = mlon
        meta["mylat"] = mlat
    for key in meta:
      mydoc.AddMetadata(key, meta[key])
    return mydoc
def add_document(house):
    global goldMember
    global silverMember
    global platinumMember
    global publicMember
    global neigboorhoods
    global push
    # Use username as unique identifier
    # id,listing_url,scrape_id,last_scraped,name,summary,space,description,experiences_offered,neighborhood_overview,notes,transit,access,interaction,house_rules,thumbnail_url,medium_url,picture_url,xl_picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,street,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,city,state,zipcode,market,smart_location,country_code,country,latitude,longitude,is_location_exact,property_type,room_type,accommodates,bathrooms,bedrooms,beds,bed_type,amenities,square_feet,price,weekly_price,monthly_price,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,requires_license,license,jurisdiction_names,instant_bookable,is_business_travel_ready,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month

    meta = dict()
    body = ""
    document_id = ""

    # We have a normal movie
    document_id = house['listing_url']
    mydoc = Document(document_id)
    # Set the fileextension
    mydoc.FileExtension = ".html"

    #print (house)

    imageurl = house['picture_url'].replace('aki_policy=large',
                                            'aki_policy=medium')
    """ if not os.path.exists("images\\"+house['id']+".jpg"):
      try:
        print ("Get Image "+str(house['id']))
        urllib.request.urlretrieve(imageurl, "images\\"+house['id']+".jpg")
        time.sleep(0.1)
      except:
        return ""
    
    imageurl="images\\"+house['id']+".jpg" """
    # Build up the quickview/preview (HTML)
    content = "<html><head><meta charset='UTF-16'><meta http-equiv='Content-Type' content='text/html; charset=UTF-16'>"
    content = content + "</head>"
    # content = content+ "<title>"+movie['title']+"    ("+movie["release_date"].split('-')[0]+")</title>"
    content = content + "<title>" + house['name'] + "</title>"
    content = content + "<body>"

    content = content + "<div class='header'>"
    content = content + "<div class='imageblock'><div class='image'>"
    if (house['picture_url']):
        content = content + " <img class='imageimg' src='" + house[
            'picture_url'] + "' onerror=\"javascript:this.src='images/emptyHouse.jpg'\">"
    content = content + "</div><div class='side'><div class='title'>" + house[
        "name"] + "</div>"
    content = content + "<div class='host'>" + house["host_name"] + "</div>"
    content = content + "<div class='city'>" + house["city"] + "</div>"
    content = content + "<div class='state'>" + house["state"] + "</div>"
    content = content + "<div class='country'>" + house["country"] + "</div>"
    content = content + "<div class='header_info'>Summary</div>"
    content = content + "<div class='info'>" + house["summary"] + "</div>"
    content = content + "<div class='header_info'>Full Description</div>"
    content = content + "<div class='info'>" + house["description"] + "</div>"
    content = content + "<div class='header_info'>Space</div>"
    content = content + "<div class='info'>" + house["space"] + "</div>"
    content = content + "<div class='header_info'>Neighborhood</div>"
    content = content + "<div class='info'>" + house[
        "neighborhood_overview"] + "</div>"
    content = content + "<div class='header_info'>Amenities</div>"
    amenities = house['amenities'].replace('"', '').replace('{', '').replace(
        '}', '').split(',')
    content = content + "<div class='amenities'>" + '<br>'.join(
        amenities) + "</div>"

    content = content + "</body></html>"

    # Geocode

    body = ""

    mydoc.SetContentAndZLibCompress(content)
    meta["connectortype"] = "Push"
    meta["mytype"] = "Houses"
    meta["myhouseid"] = house['id']
    meta["myimage"] = imageurl
    meta["mycountry"] = house['country']
    meta["mycity"] = house['city']
    meta["myprice"] = house["price"].replace('$', '')  #new
    price = float(meta["myprice"].replace(",", ''))
    if (price <= 130):
        publicMember = publicMember + ";" + house['id']
        goldMember = goldMember + ";" + house['id']
        silverMember = silverMember + ";" + house['id']
        platinumMember = platinumMember + ";" + house['id']
    if (price > 130 and price < 180):
        silverMember = silverMember + ";" + house['id']
        goldMember = goldMember + ";" + house['id']
        platinumMember = platinumMember + ";" + house['id']
    if (price >= 180 and price < 250):
        goldMember = goldMember + ";" + house['id']
        platinumMember = platinumMember + ";" + house['id']
    if (price >= 250):
        platinumMember = platinumMember + ";" + house['id']

    meta["myproptype"] = house['property_type']  #new
    meta["myroomtype"] = house['room_type']  #new
    meta["mynopersons"] = house['accommodates']  #new
    meta["mybathrooms"] = house['bathrooms']  #new
    meta["mybedrooms"] = house['bedrooms']  #new
    meta["mybeds"] = house['beds']  #new

    meta["mybedtype"] = house['bed_type']  #new
    meta["myneighbourhood"] = house['neighbourhood_cleansed']  #new
    # check if we already have the neighboorhood
    if (house['neighbourhood_cleansed'] + house['city'] not in neigboorhoods):
        if (len(house['neighborhood_overview']) > 400):
            neigboorhoods[house['neighbourhood_cleansed'] +
                          house['city']] = "WeHaveIt"
            print("Adding neighboorhood: " + house['neighbourhood_cleansed'] +
                  house['city'])
            #Add to index
            myneigh = addNeighboorhood({
                'id':
                house['neighbourhood_cleansed'] + house['city'],
                'mydescr':
                house['neighborhood_overview'],
                'name':
                house['neighbourhood_cleansed'],
                'mycity':
                house['city'],
                'mycountry':
                house['country'],
                'mylat':
                house['latitude'],
                'mylon':
                house['longitude']
            })
            push.Add(myneigh)
    meta["myamenities"] = ';'.join(amenities)  #new
    meta["myvotecount"] = house['review_scores_rating']
    meta["myhost"] = house['host_name']  #new
    meta["objecttype"] = "House"  #new
    meta["language"] = "English"

    meta["title"] = house["name"]
    # meta["topparentid"]= movie['id']
    mydoc.ClickableUri = house['listing_url']

    mydoc.Date = house['last_scraped']

    meta["mylon"] = house['longitude']
    meta["mylat"] = house['latitude']
    for key in meta:
        mydoc.AddMetadata(key, meta[key])
    return mydoc
Exemple #18
0
def main():
    sourceId = '--Enter your source id--'
    orgId = '--Enter your org id--'
    apiKey = '--Enter your API key--'

    # Setup the push client
    push = CoveoPush.Push(sourceId, orgId, apiKey)

    # First set the securityprovidername
    mysecprovidername = "MySecurityProviderTest"
    # Define cascading security provider information
    cascading = {
                  "Email Security Provider": {
                    "name": "Email Security Provider",
                    "type": "EMAIL"
                  }
                }

    # Create it
    push.AddSecurityProvider(mysecprovidername, "EXPANDED", cascading)
    startOrderingId = push.CreateOrderingId()
    # Delete all old entries
    push.DeletePermissionsOlderThan(mysecprovidername, startOrderingId)
    print ("Old ids removed. Updating security cache")
    input ("Press any key to continue...")

    # Create a document
    mydoc = Document('https://myreference&id=TESTMESECURITY')
    # Set the content. This will also be available as quickview for that document.
    content = "<meta charset='UTF-16'><meta http-equiv='Content-Type' content='text/html; charset=UTF-16'><html><head><title>My First Title</title><style>.datagrid table { border-collapse: collapse; text-align: left; } .datagrid {display:table !important;font: normal 12px/150% Arial, Helvetica, sans-serif; background: #fff; overflow: hidden; border: 1px solid #006699; -webkit-border-radius: 3px; -moz-border-radius: 3px; border-radius: 3px; }.datagrid table td, .datagrid table th { padding: 3px 10px; }.datagrid table thead th {background:-webkit-gradient( linear, left top, left bottom, color-stop(0.05, #006699), color-stop(1, #00557F) );background:-moz-linear-gradient( center top, #006699 5%, #00557F 100% );filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#006699', endColorstr='#00557F');background-color:#006699; color:#FFFFFF; font-size: 15px; font-weight: bold; border-left: 1px solid #0070A8; } .datagrid table thead th:first-child { border: none; }.datagrid table tbody td { color: #00496B; border-left: 1px solid #E1EEF4;font-size: 12px;font-weight: normal; }.datagrid table tbody  tr:nth-child(even)  td { background: #E1EEF4; color: #00496B; }.datagrid table tbody td:first-child { border-left: none; }.datagrid table tbody tr:last-child td { border-bottom: none; }</style></head><body style='Font-family:Arial'><div class='datagrid'><table><tbody><tr><td>FirstName</td><td>Willem</td></tr><tr><td>MiddleName</td><td>Van</td></tr><tr><td>LastName</td><td>Post</td></tr><tr><td>PositionDescription</td><td>VP Engineering</td></tr><tr><td>JobFunction</td><td>CTO</td></tr><tr><td>JobFamily</td><td>Management</td></tr></tbody></table></div></body></html>"
    mydoc.SetContentAndZLibCompress(content)
    # Set the metadata
    mydoc.AddMetadata("connectortype","CSV")
    authors = []
    authors.append( "Coveo" )
    authors.append( "R&D" )
    # rssauthors should be set as a multi-value field in your Coveo Cloud organization
    mydoc.AddMetadata("rssauthors", authors)
    # Set the title
    mydoc.Title = "THIS IS A TEST"

    # Define a list of users that should have access to the document.
    users = []
    users.append("wim")
    users.append("peter")

    # Define a list of users that should not have access to the document.
    deniedusers = []
    deniedusers.append("alex")
    deniedusers.append("anne")

    # Define a list of groups that should have access to the document.
    groups = []
    groups.append("HR")
    groups.append("RD")
    groups.append("SALES")

    # Create the permission Levels. Each level can include multiple sets.
    permLevel1 = CoveoPermissions.DocumentPermissionLevel('First')
    permLevel1Set1 = CoveoPermissions.DocumentPermissionSet('1Set1')
    permLevel1Set2 = CoveoPermissions.DocumentPermissionSet('1Set2')
    permLevel1Set1.AllowAnonymous = False
    permLevel1Set2.AllowAnonymous = False
    permLevel2 = CoveoPermissions.DocumentPermissionLevel('Second')
    permLevel2Set = CoveoPermissions.DocumentPermissionSet('2Set1')
    permLevel2Set.AllowAnonymous = False

    # Set the allowed permissions for the first set of the first level
    for user in users:
        # Create the permission identity
        permLevel1Set1.AddAllowedPermission(CoveoPermissions.PermissionIdentity(CoveoConstants.Constants.PermissionIdentityType.User, mysecprovidername, user))

    #Set the denied permissions for the second set of the first level
    for user in deniedusers:
        # Create the permission identity
        permLevel1Set2.AddDeniedPermission(CoveoPermissions.PermissionIdentity(CoveoConstants.Constants.PermissionIdentityType.User, mysecprovidername, user))

    # Set the allowed permissions for the first set of the second level
    for group in groups:
        # Create the permission identity
        permLevel2Set.AddAllowedPermission(CoveoPermissions.PermissionIdentity(CoveoConstants.Constants.PermissionIdentityType.Group, mysecprovidername, group))

    # Set the permission sets to the appropriate level
    permLevel1.AddPermissionSet(permLevel1Set1)
    permLevel1.AddPermissionSet(permLevel1Set2)
    permLevel2.AddPermissionSet(permLevel2Set)

    # Set the permissions on the document
    mydoc.Permissions.append(permLevel1)
    mydoc.Permissions.append(permLevel2)

    # Push the document
    push.AddSingleDocument(mydoc)

    # First do a single call to update an identity
    # We now also need to add the expansion/memberships/mappings to the security cache
    # The previouslt defined identities were: alex, anne, wim, peter
    
    usersingroup = []
    usersingroup.append("wimingroup")
    usersingroup.append("peteringroup")

    # Remove the last group, so we can add it later with a single call
    groups.pop()

    push.StartExpansion( mysecprovidername )
    # group memberships for: HR, RD
    for group in groups:
      # for each group set the users
      members = []
      for user in usersingroup:
        # Create a permission Identity
        members.append(CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.User, mysecprovidername, user ))
      push.AddExpansionMember(CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.Group, mysecprovidername, group ), members, [],[] )

    # mappings for all users, from userid to email address
    users.extend(deniedusers)
    users.extend(usersingroup)
    for user in users:
      # Create a permission Identity
      mappings=[]
      mappings.append(CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.User, "Email Security Provider", user+"@coveo.com" ))
      wellknowns=[]
      wellknowns.append(CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.Group, mysecprovidername, "Everyone"))
      push.AddExpansionMapping(CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.User, mysecprovidername, user ), [], mappings, wellknowns )

    # Remove deleted users
    # Deleted Users
    delusers = []
    delusers.append("wimn")
    delusers.append("petern")
    for user in delusers:
      # Add each identity to delete to the Deleted
      push.AddExpansionDeleted(CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.User, mysecprovidername, user ),[],[],[])

    # End the expansion and write the last batch
    push.EndExpansion( mysecprovidername )

    print ("Now updating security cache.")
    print ("Check:")
    print (" HR/RD groups: members wimingroup, peteringroup")
    print (" SALES: should not have any members")
    print (" each user: wim, peter, anne, wimingroup should have also mappings to Email security providers")
    input ("Press any key to continue...")

    # Add a single call, add the Sales group
    usersingroup = []
    usersingroup.append("wiminsalesgroup")
    usersingroup.append("peterinsalesgroup")

    members = []
    for user in usersingroup:
      # Create a permission identity
      mappings = []
      mappings.append(CoveoPermissions.PermissionIdentityExpansion(CoveoConstants.Constants.PermissionIdentityType.User, "Email Security Provider", user + "@coveo.com"))
      wellknowns = []
      wellknowns.append(CoveoPermissions.PermissionIdentityExpansion(CoveoConstants.Constants.PermissionIdentityType.Group, mysecprovidername, "Everyone"))
      members.append(CoveoPermissions.PermissionIdentityExpansion(CoveoConstants.Constants.PermissionIdentityType.User, mysecprovidername, user))
      push.AddPermissionExpansion(mysecprovidername, CoveoPermissions.PermissionIdentityExpansion(CoveoConstants.Constants.PermissionIdentityType.User, mysecprovidername, user), [], mappings, wellknowns)

    push.AddPermissionExpansion(mysecprovidername, CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.Group, mysecprovidername, "Everyone"), members, [],[])
    push.AddPermissionExpansion(mysecprovidername, CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.Group, mysecprovidername, "SALES"), members, [],[])

    print ("Now updating security cache.")
    print ("Check:")
    print (" HR/RD groups: members wimingroup, peteringroup")
    print (" SALES: should have members wiminsalesgroup, peterinsalesgroup")
    print (" each user: wim, peter, anne, wimingroup should also have mappings to Email security providers")
    input ("Press any key to continue...")

    # Remove a Identity
    # Group SALES should be removed
    push.RemovePermissionIdentity(mysecprovidername, CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.Group, mysecprovidername, "SALES"))
    print ("Now updating security cache.")
    print ("Check:")
    print (" HR/RD groups: members wimingroup,peteringroup")
    print (" NO wiminsalesgroup,peterinsalesgroup")
    print (" each user: wim, peter, anne, wimingroup should have also mappings to Email security providers")
Exemple #19
0
def create_document(pokemon: Pokemon) -> Document:
    document = Document(pokemon.link)
    document.Title = pokemon.name
    document.AddMetadata('name', pokemon.name)
    document.AddMetadata('link', pokemon.link)
    document.AddMetadata('image_link', pokemon.image_link)
    document.AddMetadata('poke_type', pokemon.poke_type)
    document.AddMetadata('number', pokemon.number)
    document.AddMetadata('generation', pokemon.generation)
    document.AddMetadata('height', pokemon.height)
    document.AddMetadata('weight', pokemon.weight)
    document.AddMetadata('species', pokemon.species)
    document.AddMetadata('description', pokemon.description)
    return document
Exemple #20
0
def create_document(fileloc):
    # Create new push document
    print("Adding: " + fileloc)
    mydoc = Document('https://server?id=' + fileloc)

    return mydoc
Exemple #21
0
def add_document(adres, pc, wpl, gem, prov,opp, bouwjaar, lon, lat, kamers, prijs, counter):
    # Create new push document
    mydoc = Document('https://myreference&id='+str(counter))
    # Build up the quickview/preview (HTML)

    # Set the fileextension
    mydoc.FileExtension = ".html"
    # Set metadata
    if checkEmpty(lat):
      mydoc.AddMetadata("lat", lat)
    if checkEmpty(lon):
      mydoc.AddMetadata("lon", lon)
    if checkEmpty(adres):
      mydoc.AddMetadata("adres", adres)
    if checkEmpty(pc):
      mydoc.AddMetadata("pc", pc)
    if checkEmpty(wpl):
      mydoc.AddMetadata("wpl", wpl)
    if checkEmpty(gem):
      mydoc.AddMetadata("gem", gem)
    if checkEmpty(prov):
      mydoc.AddMetadata("prov", prov)
    if checkEmpty(opp):
      mydoc.AddMetadata("opp", opp)
    if checkEmpty(bouwjaar):
      mydoc.AddMetadata("bouwjaar", bouwjaar)
    if checkEmpty(kamers):
      mydoc.AddMetadata("kamers", kamers)
    if checkEmpty(prijs):
      mydoc.AddMetadata("prijs", prijs)
    mydoc.Title = adres + " in "+wpl

    return mydoc