def add_document(post): # Create new push document mydoc = Document('https://myreference&id='+post['UserName']) # Build up the quickview/preview (HTML) content = "<meta charset='UTF-16'><meta http-equiv='Content-Type' content='text/html; charset=UTF-16'><html><head><title>"+post['FirstName']+" "+post['LastName']+" ("+post['JobFunction']+")</title><style>.datagrid table { border-collapse: collapse; text-align: left; } .datagrid {display:table !important;font: normal 12px/150% Arial, Helvetica, sans-serif; background: #fff; overflow: hidden; border: 1px solid #006699; -webkit-border-radius: 3px; -moz-border-radius: 3px; border-radius: 3px; }.datagrid table td, .datagrid table th { padding: 3px 10px; }.datagrid table thead th {background:-webkit-gradient( linear, left top, left bottom, color-stop(0.05, #006699), color-stop(1, #00557F) );background:-moz-linear-gradient( center top, #006699 5%, #00557F 100% );filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#006699', endColorstr='#00557F');background-color:#006699; color:#FFFFFF; font-size: 15px; font-weight: bold; border-left: 1px solid #0070A8; } .datagrid table thead th:first-child { border: none; }.datagrid table tbody td { color: #00496B; border-left: 1px solid #E1EEF4;font-size: 12px;font-weight: normal; }.datagrid table tbody tr:nth-child(even) td { background: #E1EEF4; color: #00496B; }.datagrid table tbody td:first-child { border-left: none; }.datagrid table tbody tr:last-child td { border-bottom: none; }</style></head><body style='Font-family:Arial'><div class='datagrid'><table><tbody><tr><td>FirstName</td><td>"+post[ 'FirstName']+"</td></tr><tr><td>MiddleName</td><td>"+post['MiddleName']+"</td></tr><tr><td>LastName</td><td>"+post['LastName']+"</td></tr><tr><td>PositionDescription</td><td>"+post['PositionDescription']+"</td></tr><tr><td>JobFunction</td><td>"+post['JobFunction']+"</td></tr><tr><td>JobFamily</td><td>post['JobFamily']</td></tr></tbody></table></div></body></html>" mydoc.SetContentAndZLibCompress(content) # Set the fileextension mydoc.FileExtension = ".html" # Set metadata mydoc.AddMetadata("connectortype", "CSV") authors = [] authors.append("Coveo") authors.append("R&D") # rssauthors should be set as a multi-value field in your Coveo Cloud organization mydoc.AddMetadata("rssauthors", authors) # Set the date mydoc.SetDate(datetime.datetime.now()) mydoc.SetModifiedDate(datetime.datetime.now()) mydoc.Title = post['FirstName']+' ' + post['LastName']+' '+'('+post['JobFunction']+')' # Set permissions user_email = "*****@*****.**" myperm = CoveoPermissions.PermissionIdentity(CoveoConstants.Constants.PermissionIdentityType.User, "", user_email) mydoc.SetAllowedAndDeniedPermissions([myperm], [], True) print('\nUser %s for title "%s"' % (user_email, post['FirstName'])) return mydoc
def create_document(people): # Create new push document mydoc = Document('https://myreference?id=' + str(people['id'])) # Build up the quickview/preview (HTML) content = "<HTML><BODY></BODY></HTML>" mydoc.SetContentAndZLibCompress(content) return mydoc
def addNeighboorhood(neighboorhood): document_id = 'https://neighboorhood?id=' + neighboorhood['id'] mydoc = Document(document_id) # Set the fileextension mydoc.FileExtension = ".html" mydoc.Title = neighboorhood['name'] mydoc.AddMetadata('myneighbourhood', neighboorhood['name']) mydoc.AddMetadata('mydescr', neighboorhood['mydescr']) mydoc.AddMetadata('mycountry', neighboorhood['mycountry']) mydoc.AddMetadata('mycity', neighboorhood['mycity']) mydoc.AddMetadata('mylat', neighboorhood['mylat']) mydoc.AddMetadata('mylon', neighboorhood['mylon']) mydoc.AddMetadata('objecttype', 'Neighboorhood') mydoc.SetContentAndZLibCompress(neighboorhood['mydescr']) return mydoc
def main(): sourceId = '--Enter your source id--' orgId = '--Enter your org id--' apiKey = '--Enter your API key--' # Setup the push client push = CoveoPush.Push(sourceId, orgId, apiKey) # Get a first Ordering Id startOrderingId = push.CreateOrderingId() # Create a document mydoc = Document('https://myreference&id=TESTME') # Set the content. This will also be available as the quickview. content = "<meta charset='UTF-16'><meta http-equiv='Content-Type' content='text/html; charset=UTF-16'><html><head><title>My First Title</title><style>.datagrid table { border-collapse: collapse; text-align: left; } .datagrid {display:table !important;font: normal 12px/150% Arial, Helvetica, sans-serif; background: #fff; overflow: hidden; border: 1px solid #006699; -webkit-border-radius: 3px; -moz-border-radius: 3px; border-radius: 3px; }.datagrid table td, .datagrid table th { padding: 3px 10px; }.datagrid table thead th {background:-webkit-gradient( linear, left top, left bottom, color-stop(0.05, #006699), color-stop(1, #00557F) );background:-moz-linear-gradient( center top, #006699 5%, #00557F 100% );filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#006699', endColorstr='#00557F');background-color:#006699; color:#FFFFFF; font-size: 15px; font-weight: bold; border-left: 1px solid #0070A8; } .datagrid table thead th:first-child { border: none; }.datagrid table tbody td { color: #00496B; border-left: 1px solid #E1EEF4;font-size: 12px;font-weight: normal; }.datagrid table tbody tr:nth-child(even) td { background: #E1EEF4; color: #00496B; }.datagrid table tbody td:first-child { border-left: none; }.datagrid table tbody tr:last-child td { border-bottom: none; }</style></head><body style='Font-family:Arial'><div class='datagrid'><table><tbody><tr><td>FirstName</td><td>Willem</td></tr><tr><td>MiddleName</td><td>Van</td></tr><tr><td>LastName</td><td>Post</td></tr><tr><td>PositionDescription</td><td>VP Engineering</td></tr><tr><td>JobFunction</td><td>CTO</td></tr><tr><td>JobFamily</td><td>Management</td></tr></tbody></table></div></body></html>" mydoc.SetContentAndZLibCompress(content) # Set the metadata mydoc.AddMetadata("connectortype", "CSV") authors = [] authors.append("Coveo") authors.append("R&D") # rssauthors should be set as a multi-value field in your Coveo Cloud organization mydoc.AddMetadata("rssauthors", authors) # Set the title mydoc.Title = "THIS IS A TEST" # Add a user email to be used for identities user_email = "*****@*****.**" # Create a permission identity myperm = CoveoPermissions.PermissionIdentity( CoveoConstants.Constants.PermissionIdentityType.User, "", user_email) # Set the permissions on the document allowAnonymous = True mydoc.SetAllowedAndDeniedPermissions([myperm], [], allowAnonymous) # Push the document push.AddSingleDocument(mydoc) # Delete older documents push.DeleteOlderThan(startOrderingId)
def main(): sourceId = '--Enter your source id--' orgId = '--Enter your org id--' apiKey = '--Enter your API key--' # Setup the push client push = CoveoPush.Push(sourceId, orgId, apiKey) # First set the securityprovidername mysecprovidername = "MySecurityProviderTest" # Define cascading security provider information cascading = { "Email Security Provider": { "name": "Email Security Provider", "type": "EMAIL" } } # Create it push.AddSecurityProvider(mysecprovidername, "EXPANDED", cascading) startOrderingId = push.CreateOrderingId() # Delete all old entries push.DeletePermissionsOlderThan(mysecprovidername, startOrderingId) print ("Old ids removed. Updating security cache") input ("Press any key to continue...") # Create a document mydoc = Document('https://myreference&id=TESTMESECURITY') # Set the content. This will also be available as quickview for that document. content = "<meta charset='UTF-16'><meta http-equiv='Content-Type' content='text/html; charset=UTF-16'><html><head><title>My First Title</title><style>.datagrid table { border-collapse: collapse; text-align: left; } .datagrid {display:table !important;font: normal 12px/150% Arial, Helvetica, sans-serif; background: #fff; overflow: hidden; border: 1px solid #006699; -webkit-border-radius: 3px; -moz-border-radius: 3px; border-radius: 3px; }.datagrid table td, .datagrid table th { padding: 3px 10px; }.datagrid table thead th {background:-webkit-gradient( linear, left top, left bottom, color-stop(0.05, #006699), color-stop(1, #00557F) );background:-moz-linear-gradient( center top, #006699 5%, #00557F 100% );filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#006699', endColorstr='#00557F');background-color:#006699; color:#FFFFFF; font-size: 15px; font-weight: bold; border-left: 1px solid #0070A8; } .datagrid table thead th:first-child { border: none; }.datagrid table tbody td { color: #00496B; border-left: 1px solid #E1EEF4;font-size: 12px;font-weight: normal; }.datagrid table tbody tr:nth-child(even) td { background: #E1EEF4; color: #00496B; }.datagrid table tbody td:first-child { border-left: none; }.datagrid table tbody tr:last-child td { border-bottom: none; }</style></head><body style='Font-family:Arial'><div class='datagrid'><table><tbody><tr><td>FirstName</td><td>Willem</td></tr><tr><td>MiddleName</td><td>Van</td></tr><tr><td>LastName</td><td>Post</td></tr><tr><td>PositionDescription</td><td>VP Engineering</td></tr><tr><td>JobFunction</td><td>CTO</td></tr><tr><td>JobFamily</td><td>Management</td></tr></tbody></table></div></body></html>" mydoc.SetContentAndZLibCompress(content) # Set the metadata mydoc.AddMetadata("connectortype","CSV") authors = [] authors.append( "Coveo" ) authors.append( "R&D" ) # rssauthors should be set as a multi-value field in your Coveo Cloud organization mydoc.AddMetadata("rssauthors", authors) # Set the title mydoc.Title = "THIS IS A TEST" # Define a list of users that should have access to the document. users = [] users.append("wim") users.append("peter") # Define a list of users that should not have access to the document. deniedusers = [] deniedusers.append("alex") deniedusers.append("anne") # Define a list of groups that should have access to the document. groups = [] groups.append("HR") groups.append("RD") groups.append("SALES") # Create the permission Levels. Each level can include multiple sets. permLevel1 = CoveoPermissions.DocumentPermissionLevel('First') permLevel1Set1 = CoveoPermissions.DocumentPermissionSet('1Set1') permLevel1Set2 = CoveoPermissions.DocumentPermissionSet('1Set2') permLevel1Set1.AllowAnonymous = False permLevel1Set2.AllowAnonymous = False permLevel2 = CoveoPermissions.DocumentPermissionLevel('Second') permLevel2Set = CoveoPermissions.DocumentPermissionSet('2Set1') permLevel2Set.AllowAnonymous = False # Set the allowed permissions for the first set of the first level for user in users: # Create the permission identity permLevel1Set1.AddAllowedPermission(CoveoPermissions.PermissionIdentity(CoveoConstants.Constants.PermissionIdentityType.User, mysecprovidername, user)) #Set the denied permissions for the second set of the first level for user in deniedusers: # Create the permission identity permLevel1Set2.AddDeniedPermission(CoveoPermissions.PermissionIdentity(CoveoConstants.Constants.PermissionIdentityType.User, mysecprovidername, user)) # Set the allowed permissions for the first set of the second level for group in groups: # Create the permission identity permLevel2Set.AddAllowedPermission(CoveoPermissions.PermissionIdentity(CoveoConstants.Constants.PermissionIdentityType.Group, mysecprovidername, group)) # Set the permission sets to the appropriate level permLevel1.AddPermissionSet(permLevel1Set1) permLevel1.AddPermissionSet(permLevel1Set2) permLevel2.AddPermissionSet(permLevel2Set) # Set the permissions on the document mydoc.Permissions.append(permLevel1) mydoc.Permissions.append(permLevel2) # Push the document push.AddSingleDocument(mydoc) # First do a single call to update an identity # We now also need to add the expansion/memberships/mappings to the security cache # The previouslt defined identities were: alex, anne, wim, peter usersingroup = [] usersingroup.append("wimingroup") usersingroup.append("peteringroup") # Remove the last group, so we can add it later with a single call groups.pop() push.StartExpansion( mysecprovidername ) # group memberships for: HR, RD for group in groups: # for each group set the users members = [] for user in usersingroup: # Create a permission Identity members.append(CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.User, mysecprovidername, user )) push.AddExpansionMember(CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.Group, mysecprovidername, group ), members, [],[] ) # mappings for all users, from userid to email address users.extend(deniedusers) users.extend(usersingroup) for user in users: # Create a permission Identity mappings=[] mappings.append(CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.User, "Email Security Provider", user+"@coveo.com" )) wellknowns=[] wellknowns.append(CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.Group, mysecprovidername, "Everyone")) push.AddExpansionMapping(CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.User, mysecprovidername, user ), [], mappings, wellknowns ) # Remove deleted users # Deleted Users delusers = [] delusers.append("wimn") delusers.append("petern") for user in delusers: # Add each identity to delete to the Deleted push.AddExpansionDeleted(CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.User, mysecprovidername, user ),[],[],[]) # End the expansion and write the last batch push.EndExpansion( mysecprovidername ) print ("Now updating security cache.") print ("Check:") print (" HR/RD groups: members wimingroup, peteringroup") print (" SALES: should not have any members") print (" each user: wim, peter, anne, wimingroup should have also mappings to Email security providers") input ("Press any key to continue...") # Add a single call, add the Sales group usersingroup = [] usersingroup.append("wiminsalesgroup") usersingroup.append("peterinsalesgroup") members = [] for user in usersingroup: # Create a permission identity mappings = [] mappings.append(CoveoPermissions.PermissionIdentityExpansion(CoveoConstants.Constants.PermissionIdentityType.User, "Email Security Provider", user + "@coveo.com")) wellknowns = [] wellknowns.append(CoveoPermissions.PermissionIdentityExpansion(CoveoConstants.Constants.PermissionIdentityType.Group, mysecprovidername, "Everyone")) members.append(CoveoPermissions.PermissionIdentityExpansion(CoveoConstants.Constants.PermissionIdentityType.User, mysecprovidername, user)) push.AddPermissionExpansion(mysecprovidername, CoveoPermissions.PermissionIdentityExpansion(CoveoConstants.Constants.PermissionIdentityType.User, mysecprovidername, user), [], mappings, wellknowns) push.AddPermissionExpansion(mysecprovidername, CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.Group, mysecprovidername, "Everyone"), members, [],[]) push.AddPermissionExpansion(mysecprovidername, CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.Group, mysecprovidername, "SALES"), members, [],[]) print ("Now updating security cache.") print ("Check:") print (" HR/RD groups: members wimingroup, peteringroup") print (" SALES: should have members wiminsalesgroup, peterinsalesgroup") print (" each user: wim, peter, anne, wimingroup should also have mappings to Email security providers") input ("Press any key to continue...") # Remove a Identity # Group SALES should be removed push.RemovePermissionIdentity(mysecprovidername, CoveoPermissions.PermissionIdentityExpansion( CoveoConstants.Constants.PermissionIdentityType.Group, mysecprovidername, "SALES")) print ("Now updating security cache.") print ("Check:") print (" HR/RD groups: members wimingroup,peteringroup") print (" NO wiminsalesgroup,peterinsalesgroup") print (" each user: wim, peter, anne, wimingroup should have also mappings to Email security providers")
def add_document(house): global goldMember global silverMember global platinumMember global publicMember global neigboorhoods global push global noratings global BASE_URL global currentExport # Use username as unique identifier # id,listing_url,scrape_id,last_scraped,name,summary,space,description,experiences_offered,neighborhood_overview,notes,transit,access,interaction,house_rules,thumbnail_url,medium_url,picture_url,xl_picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,street,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,city,state,zipcode,market,smart_location,country_code,country,latitude,longitude,is_location_exact,property_type,room_type,accommodates,bathrooms,bedrooms,beds,bed_type,amenities,square_feet,price,weekly_price,monthly_price,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,requires_license,license,jurisdiction_names,instant_bookable,is_business_travel_ready,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month meta = dict() body = "" document_id = "" # We have a normal movie document_id = house['listing_url'] mydoc = Document(document_id) # Set the fileextension mydoc.FileExtension = ".html" #print (house) imageurl = house['picture_url'].replace('aki_policy=large', 'aki_policy=medium') """ if not os.path.exists("images\\"+house['id']+".jpg"): try: print ("Get Image "+str(house['id'])) urllib.request.urlretrieve(imageurl, "images\\"+house['id']+".jpg") time.sleep(0.1) except: return "" imageurl="images\\"+house['id']+".jpg" """ # Build up the quickview/preview (HTML) content = "<html><head><meta charset='UTF-16'><meta http-equiv='Content-Type' content='text/html; charset=UTF-16'>" # content = "<link href=\"https://fonts.googleapis.com/css?family=Montserrat:400,600&display=swap\" rel=\"stylesheet\">" content = content + "<style>" content = content + " body > div:nth-child(2) { display: none }" content = content + " [id^=CoveoHighlight] {background-color: white !important;}" content = content + " .side .title {" content = content + " color: #000 !important; font-size: 16pt !important;padding-bottom: 15px;" content = content + " }" content = content + " .side .host {" content = content + " display:inline-block;" content = content + " color: gray;padding-bottom: 15px;" content = content + " }" content = content + " .side .city, .side .state, .side .country {" content = content + " display: inline-block;" content = content + " padding-left: 10px;" content = content + " color: gray;" content = content + " }" content = content + " .side .header_info {" content = content + " font-size: 12pt;" #content = content + " font-weight: bold;" content = content + " }" content = content + " .side .info {" content = content + " font-size: 10pt;padding-bottom: 15px;" content = content + " }" content = content + " .side .infos {" content = content + " color: gray;padding-bottom: 15px;display:inline-block;padding-right: 10px;" content = content + " }" content = content + " .myimage img, .image img{" content = content + " box-shadow: 1px 3px 5px 0px gray;" content = content + " width: 100%;" content = content + " }" content = content + " .myimage img {" content = content + " border-radius: 2px;" content = content + " max-height:250px;" content = content + " }" content = content + " .image img {" content = content + " border-radius: 4px;" content = content + " max-height:350px;max-width: 350px;" content = content + "}" content = content + " .image {" content = content + " float: left;" content = content + " padding: 5px;" content = content + " padding-right: 15px;" content = content + " }" content = content + " .amenities { font-size: 11pt; column-count: 3;max-width: 50%;}" content = content + " .inf_title { display:inline-block;padding-right: 5px; font-size: 0.8em; font-style: italic;}" content = content + " .inf_value { display:inline-block;}" content = content + " body {font-family: 'Verdana', sans-serif !important;}" content = content + " ul {list-style: none;font-size:11pt}" content = content + " li {padding-right: 5px}" content = content + " .host {padding-right: 5px}" content = content + " .city {padding-right: 5px}" content = content + " .state {padding-right: 5px}" content = content + " .country {padding-right: 5px}" #content = content + " .host::before {content: 'By'; padding-right: 5px; font-size: 0.8em; font-style: italic;}" #content = content + " .city::before {content: 'In'; padding-right: 5px; font-size: 0.8em; font-style: italic;}" #content = content + " .state::before {content: 'State'; padding-right: 5px; font-size: 0.8em; font-style: italic;}" #content = content + " .country::before {content: 'Country'; padding-right: 5px; font-size: 0.8em; font-style: italic;}" content = content + "</style>" content = content + "</head>" # The below is NOT allowed # content = content + "<script>" # content = content + " function removeHigh() {" # content = content + " console.log('removeHigh called');" # content = content + " var high=document.querySelectorAll('[id^=\"CoveoHighlight\"]');" # content = content + " high.forEach(k => { k.style.backgroundColor =\"white\"; } );" # content = content + " }" # content = content + "document.addEventListener('DOMContentLoaded', function () {" # content = content + " removeHigh();" # content = content + " document.querySelector('body > div:nth-child(2)').style.display=\"none\";" # content = content + "});" # content = content + "</script>" # content = content+ "<title>"+movie['title']+" ("+movie["release_date"].split('-')[0]+")</title>" content = content + "<title>" + house['name'] + "</title>" content = content + "<body>" content = content + "<div class='header'>" content = content + "<div class='imageblock'><div class='image'>" if (house['picture_url']): content = content + " <img class='imageimg' src='" + house[ 'picture_url'] + "' onerror=\"javascript:this.src='images/emptyHouse.jpg'\">" content = content + "</div><div class='side' style='padding-left: 370px;'><div class='title'>" + house[ "name"] + "</div>" #content = content + "<ul>" #content = content + "<li>By<span class='host'>"+house["host_name"]+"<span></li>" #content = content + "<li>In<span class='city'>"+house["city"]+"<span></li>" #content = content + "<li>State<span class='state'>"+house["state"]+"<span></li>" #content = content + "<li>Country<span class='country'>"+house["country"]+"<span></li>" #content = content + "</ul>" content = content + "<div class='header_info'>Overview</div>" #content = content + "<div class='infos'><div class='inf_title'>Property type</div><div class='inf_value'>"+house["property_type"]+"</div></div>" #content = content + "<div class='infos'><div class='inf_title'>Room type</div><div class='inf_value'>"+house["room_type"]+"</div></div>" #content = content + "<div class='infos'><div class='inf_title'>Bed type</div><div class='inf_value'>"+house["bed_type"]+"</div></div>" #content = content + "<div class='header_info'>Summary</div>" content = content + "<div class='info'>" + house["summary"] + "</div>" if house["description"]: content = content + "<div class='header_info'>Full Description</div>" content = content + "<div class='info'>" + house[ "description"] + "</div>" if house["space"]: content = content + "<div class='header_info'>Space</div>" content = content + "<div class='info'>" + house["space"] + "</div>" if house["neighborhood_overview"]: content = content + "<div class='header_info'>Neighborhood</div>" content = content + "<div class='info'>" + house[ "neighborhood_overview"] + "</div>" #content = content + "<div class='header_info'>Amenities</div>" amenities = house['amenities'].replace('"', '').replace('{', '').replace( '}', '').split(',') #content = content + "<div class='amenities'>"+ '<br>'.join(amenities)+"</div>" content = content + "</body></html>" #put content also in fields for Sitecore dumps #meta["sitecorePage"] = content meta["sitecoreDescription"] = house["description"] meta["sitecoreSpace"] = house["space"] meta["sitecoreNeighbourhood"] = house["neighborhood_overview"] # Geocode body = "" mydoc.SetContentAndZLibCompress(content) meta["connectortype"] = "Push" meta["mytype"] = "Houses" meta["myhouseid"] = house['id'] meta["myimage"] = imageurl meta["mycountry"] = house['country'] meta["mycity"] = house['city'] meta["myprice"] = house["price"].replace('$', '') #new price = float(meta["myprice"].replace(",", '')) if (price <= 130): members = "Public;Gold;Silver;Platinum" if (price > 130 and price < 180): members = "Gold;Silver;Platinum" if (price >= 180 and price < 250): members = "Silver;Platinum" if (price >= 250): members = "Platinum" if not noratings: #add ratings addme = randint(1, 5) rating = randint(1, 5) if (addme > 2): myrate = addRating({ 'id': house['id'] + 'A', 'house_id': house['id'], 'type': 'Business', 'age': '25-40', 'rate': rating }) push.Add(myrate) rating = randint(1, 5) myrate = addRating({ 'id': house['id'] + 'B', 'house_id': house['id'], 'type': 'Business', 'age': '40-50', 'rate': rating }) push.Add(myrate) if (price > 180): rating = randint(1, 5) myrate = addRating({ 'id': house['id'] + 'C', 'house_id': house['id'], 'type': 'Business', 'age': '50+', 'rate': rating }) push.Add(myrate) rating = randint(1, 5) myrate = addRating({ 'id': house['id'] + 'D', 'house_id': house['id'], 'type': 'Family', 'age': '25-35', 'rate': rating }) push.Add(myrate) if (addme > 2): rating = randint(1, 5) myrate = addRating({ 'id': house['id'] + 'E', 'house_id': house['id'], 'type': 'Family', 'age': '35-50', 'rate': rating }) push.Add(myrate) if (price <= 180): rating = randint(1, 5) myrate = addRating({ 'id': house['id'] + 'F', 'house_id': house['id'], 'type': 'Individual', 'age': '20-30', 'rate': rating }) push.Add(myrate) if (addme > 2): rating = randint(1, 5) myrate = addRating({ 'id': house['id'] + 'G', 'house_id': house['id'], 'type': 'Individual', 'age': '30-40', 'rate': rating }) push.Add(myrate) rating = randint(1, 5) myrate = addRating({ 'id': house['id'] + 'I', 'house_id': house['id'], 'type': 'Individual', 'age': '40-50', 'rate': rating }) push.Add(myrate) meta["mymemberships"] = members meta["myproptype"] = house['property_type'] #new meta["myroomtype"] = house['room_type'] #new meta["mynopersons"] = house['accommodates'] #new meta["mybathrooms"] = house['bathrooms'] #new meta["mybedrooms"] = house['bedrooms'] #new meta["mybeds"] = house['beds'] #new meta["mybathroomsf"] = house['bathrooms'] #new meta["mybedroomsf"] = house['bedrooms'] #new meta["mybedsf"] = house['beds'] #new meta["mybedtype"] = house['bed_type'] #new meta["myneighbourhood"] = house['neighbourhood_cleansed'] #new # check if we already have the neighboorhood if (house['neighbourhood_cleansed'] + house['city'] not in neigboorhoods): if (len(house['neighborhood_overview']) > 400): neigboorhoods[house['neighbourhood_cleansed'] + house['city']] = "WeHaveIt" print("Adding neighboorhood: " + house['neighbourhood_cleansed'] + house['city']) #Add to index myneigh = addNeighboorhood({ 'id': house['neighbourhood_cleansed'] + house['city'], 'mydescr': house['neighborhood_overview'], 'name': house['neighbourhood_cleansed'], 'mycity': house['city'], 'mycountry': house['country'], 'mylat': house['latitude'], 'mylon': house['longitude'] }) push.Add(myneigh) meta["myamenities"] = ';'.join(amenities) #new meta["myvotecount"] = house['review_scores_rating'] meta["myhost"] = house['host_name'] #new meta["myhostid"] = house['host_id'] #new meta["objecttype"] = "House" #new meta["language"] = "English" #meta["sitecoreurl"] = BASE_URL+house['country']+"/"+house['city']+"/"+house['id'] meta["sitecoreurl"] = (BASE_URL + house['country'] + "/" + house['id']).lower() meta["myroomprop"] = house['property_type'] + ";" + house[ 'property_type'] + "|" + house['room_type'] meta["title"] = house["name"] # meta["topparentid"]= movie['id'] mydoc.ClickableUri = meta["sitecoreurl"] #house['listing_url'] mydoc.Date = house['last_scraped'] meta["mylon"] = house['longitude'] meta["mylat"] = house['latitude'] #Dump meta inside new JSON for import in Sitecore for key in meta: if ('sitecore' not in key): mydoc.AddMetadata(key, meta[key]) return mydoc
def add_document(house): global goldMember global silverMember global platinumMember global publicMember global neigboorhoods global push # Use username as unique identifier # id,listing_url,scrape_id,last_scraped,name,summary,space,description,experiences_offered,neighborhood_overview,notes,transit,access,interaction,house_rules,thumbnail_url,medium_url,picture_url,xl_picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,street,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,city,state,zipcode,market,smart_location,country_code,country,latitude,longitude,is_location_exact,property_type,room_type,accommodates,bathrooms,bedrooms,beds,bed_type,amenities,square_feet,price,weekly_price,monthly_price,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,requires_license,license,jurisdiction_names,instant_bookable,is_business_travel_ready,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month meta = dict() body = "" document_id = "" # We have a normal movie document_id = house['listing_url'] mydoc = Document(document_id) # Set the fileextension mydoc.FileExtension = ".html" #print (house) imageurl = house['picture_url'].replace('aki_policy=large', 'aki_policy=medium') """ if not os.path.exists("images\\"+house['id']+".jpg"): try: print ("Get Image "+str(house['id'])) urllib.request.urlretrieve(imageurl, "images\\"+house['id']+".jpg") time.sleep(0.1) except: return "" imageurl="images\\"+house['id']+".jpg" """ # Build up the quickview/preview (HTML) content = "<html><head><meta charset='UTF-16'><meta http-equiv='Content-Type' content='text/html; charset=UTF-16'>" content = content + "</head>" # content = content+ "<title>"+movie['title']+" ("+movie["release_date"].split('-')[0]+")</title>" content = content + "<title>" + house['name'] + "</title>" content = content + "<body>" content = content + "<div class='header'>" content = content + "<div class='imageblock'><div class='image'>" if (house['picture_url']): content = content + " <img class='imageimg' src='" + house[ 'picture_url'] + "' onerror=\"javascript:this.src='images/emptyHouse.jpg'\">" content = content + "</div><div class='side'><div class='title'>" + house[ "name"] + "</div>" content = content + "<div class='host'>" + house["host_name"] + "</div>" content = content + "<div class='city'>" + house["city"] + "</div>" content = content + "<div class='state'>" + house["state"] + "</div>" content = content + "<div class='country'>" + house["country"] + "</div>" content = content + "<div class='header_info'>Summary</div>" content = content + "<div class='info'>" + house["summary"] + "</div>" content = content + "<div class='header_info'>Full Description</div>" content = content + "<div class='info'>" + house["description"] + "</div>" content = content + "<div class='header_info'>Space</div>" content = content + "<div class='info'>" + house["space"] + "</div>" content = content + "<div class='header_info'>Neighborhood</div>" content = content + "<div class='info'>" + house[ "neighborhood_overview"] + "</div>" content = content + "<div class='header_info'>Amenities</div>" amenities = house['amenities'].replace('"', '').replace('{', '').replace( '}', '').split(',') content = content + "<div class='amenities'>" + '<br>'.join( amenities) + "</div>" content = content + "</body></html>" # Geocode body = "" mydoc.SetContentAndZLibCompress(content) meta["connectortype"] = "Push" meta["mytype"] = "Houses" meta["myhouseid"] = house['id'] meta["myimage"] = imageurl meta["mycountry"] = house['country'] meta["mycity"] = house['city'] meta["myprice"] = house["price"].replace('$', '') #new price = float(meta["myprice"].replace(",", '')) if (price <= 130): publicMember = publicMember + ";" + house['id'] goldMember = goldMember + ";" + house['id'] silverMember = silverMember + ";" + house['id'] platinumMember = platinumMember + ";" + house['id'] if (price > 130 and price < 180): silverMember = silverMember + ";" + house['id'] goldMember = goldMember + ";" + house['id'] platinumMember = platinumMember + ";" + house['id'] if (price >= 180 and price < 250): goldMember = goldMember + ";" + house['id'] platinumMember = platinumMember + ";" + house['id'] if (price >= 250): platinumMember = platinumMember + ";" + house['id'] meta["myproptype"] = house['property_type'] #new meta["myroomtype"] = house['room_type'] #new meta["mynopersons"] = house['accommodates'] #new meta["mybathrooms"] = house['bathrooms'] #new meta["mybedrooms"] = house['bedrooms'] #new meta["mybeds"] = house['beds'] #new meta["mybedtype"] = house['bed_type'] #new meta["myneighbourhood"] = house['neighbourhood_cleansed'] #new # check if we already have the neighboorhood if (house['neighbourhood_cleansed'] + house['city'] not in neigboorhoods): if (len(house['neighborhood_overview']) > 400): neigboorhoods[house['neighbourhood_cleansed'] + house['city']] = "WeHaveIt" print("Adding neighboorhood: " + house['neighbourhood_cleansed'] + house['city']) #Add to index myneigh = addNeighboorhood({ 'id': house['neighbourhood_cleansed'] + house['city'], 'mydescr': house['neighborhood_overview'], 'name': house['neighbourhood_cleansed'], 'mycity': house['city'], 'mycountry': house['country'], 'mylat': house['latitude'], 'mylon': house['longitude'] }) push.Add(myneigh) meta["myamenities"] = ';'.join(amenities) #new meta["myvotecount"] = house['review_scores_rating'] meta["myhost"] = house['host_name'] #new meta["objecttype"] = "House" #new meta["language"] = "English" meta["title"] = house["name"] # meta["topparentid"]= movie['id'] mydoc.ClickableUri = house['listing_url'] mydoc.Date = house['last_scraped'] meta["mylon"] = house['longitude'] meta["mylat"] = house['latitude'] for key in meta: mydoc.AddMetadata(key, meta[key]) return mydoc
def add_document(movie, mcountry, mcity, mregion, mlat, mlon, counter): # Use username as unique identifier meta = dict() body = "" document_id = "" # We have a normal movie document_id = 'https://www.themoviedb.org/movie/' + str(movie['id'])+'/'+str(counter) mydoc = Document(document_id) # Set the fileextension mydoc.FileExtension = ".html" # alt titles alttitles = "" for alttitle in movie['alternative_titles']['titles']: alttitles = alttitles+alttitle['title']+"<BR>" # countries countries = "" firstcountry = "" first = True for country in movie['production_countries']: countries = countries+country['name']+";" if first: firstcountry = country['iso_3166_1'] first = False # keywords keywords = "" for keyword in movie['keywords']['keywords']: keywords = keywords+keyword['name']+" - " # genre genres = "" for genre in movie['genres']: genres = genres+genre['name'].title()+";" # cast allpeople = "" casts = "" castsfull = "" for cast in movie['credits']['cast']: character = cast['character'] character = character.lower().replace(' and ', ' & ') character = character.title() casts = casts+character+";" # if character: # casts=casts+cast['name']+" as "+cast['character']+";" # else: # casts=casts+cast['name']+";" if allpeople.find(cast['name']) == -1: allpeople = allpeople+cast['name']+';' if (cast['profile_path']): castsfull = castsfull+"<li class='cast'><img class='castimg' src='https://image.tmdb.org/t/p/w66_and_h66_bestv2" + \ cast['profile_path']+"'><div class='info'><b>"+ \ cast['name']+"</b><br>"+character+"<br></div></li>" else: castsfull = castsfull+"<li class='cast'><div class='noimage'></div><div class='info'><b>" + \ cast['name']+"</b><br>"+character+"<br></div></li>" if castsfull: castsfull = "<ol class='castlist'>"+castsfull+"</ol>" # crews crews = "" crewsfull = "" for crew in movie['credits']['crew']: if allpeople.find(crew['name']) == -1: allpeople = allpeople+crew['name']+';' crews = crews+crew['name']+" as "+crew['job']+";" if (crew['profile_path']): crewsfull = crewsfull+"<li class='cast'><img class='castimg' src='https://image.tmdb.org/t/p/w66_and_h66_bestv2" + \ crew['profile_path']+"'><div class='info'><b>"+ \ crew['name']+"</b><br>"+crew['job']+"<br></div></li>" else: crewsfull = crewsfull+"<li class='cast'><div class='noimage'></div><div class='info'><b>" + \ crew['name']+"</b><br>"+crew['job']+"<br></div></li>" if crewsfull: crewsfull = "<ol class='castlist'>"+crewsfull+"</ol>" # spoken spoken = "" for spoke in movie['spoken_languages']: spoken = spoken+spoke['name']+";" # add to completions try: year = int(movie["release_date"].split('-')[0]) except: year = 0 relatedartist = "" relatedsongs = "" if movie['popularity'] == 1e-06: movie['popularity'] = 0 # Build up the quickview/preview (HTML) content = "<html><head><meta charset='UTF-16'><meta http-equiv='Content-Type' content='text/html; charset=UTF-16'>" content = content + "</head>" # content = content+ "<title>"+movie['title']+" ("+movie["release_date"].split('-')[0]+")</title>" content = content + "<title>"+movie['title']+" ("+mcity+")</title>" content = content + "<body>" content = content + "<style>body { -ms-overflow-style: -ms-autohiding-scrollbar; background-color: #f4f4f4; color: #000; font-family: 'Source Sans Pro', Arial, sans-serif; font-size: 1em; -webkit-font-smoothing: antialiased; -moz-osx-font-smoothing: grayscale;}" content = content + \ " .header { width: 100%; position: relative; z-index: 1;box-sizing:box}" content = content + \ " .imageblock { display: inline-flex; background-image: radial-gradient(circle at 20% 50%, rgba(11.76%, 15.29%, 17.25%, 0.98) 0%, rgba(11.76%, 15.29%, 17.25%, 0.88) 100%);}" if movie['backdrop_path']: content = content + " .header:before { content: ''; position: absolute; left: 0; right: 0; height:100%; width: 100%; z-index: -1; display: block; filter: opacity(100) grayscale(100%) contrast(130%); background-size: cover; background-repeat: no-repeat; background-position: 50% 50%; background-image: url('https://image.tmdb.org/t/p/w1400_and_h450_bestv2"+movie[ 'backdrop_path']+"'); will-change: opacity; transition: filter 1s; }" meta["mybackdrop"] = movie['backdrop_path'] content = content + \ " .image { padding-left:20px;padding-bottom:20px;padding-top:40px;display: block; width: 300px; height: 450px; position: relative; z-index: 2;}" content = content + \ " .imageimg {-webkit-box-shadow: 0px 0px 5px 2px rgba(255,255,255,1);-moz-box-shadow: 0px 0px 5px 2px rgba(255,255,255,1);box-shadow: 0px 0px 5px 2px rgba(255,255,255,1); display: block; width: 300px; height: 450px; border-radius: 4px;}" content = content + \ " .side {padding-top:40px;padding-bottom:40px;margin-left: 15px; color: #ffffff; }" content = content + " .noimage {width: 66px; height: 66px; line-height: 66px; font-size: 33px; display: inline-block; font-family: 'Arial'; text-align: center; background-color: #dbdbdb; color: #b5b5b5; box-sizing: border-box; font-size: 1em; border-radius: 4px; border: 1px solid #d7d7d7;}" content = content + " .noimage:before { content: \"X\";}" content = content + \ " .castlist { list-style-type: none; list-style-position: outside; margin: 0; display: flex; flex-wrap: wrap; justify-content: flex-start;}" content = content + \ " .castimg { box-sizing: border-box; line-height: 66px; font-size: 33px; display:inline-block; width: 66px; height: 66px; border-radius: 4px;-webkit-box-shadow: 2px 2px 3px 1px rgba(128,119,128,1);-moz-box-shadow: 2px 2px 3px 1px rgba(128,119,128,1);box-shadow: 2px 2px 3px 1px rgba(128,119,128,1);}" content = content + " .cast { width: 25%;padding-bottom:10px; }" content = content + \ " div.info { display: block; width:60%; align-items: center; padding-top:5px; padding-left: 14px; padding-right: 20px; }" content = content + \ " div.title h2 {margin: 0; padding: 0; font-size: 2.4em; line-height: 1.1em; font-weight: 700; display: inline-block;}" content = content + \ " .year { padding-left:10px; opacity: 0.6; font-size: 1.7em; font-weight: 400;}" content = content + " div.title { width: 100%; margin-bottom: 30px;}" content = content + " div.header_info { width: 100%;}" content = content + \ " h3 { font-weight: 600; line-height: 1.3em; font-size: 1.3em; margin-bottom: 8px;}" content = content + \ " p.over { font-size: 1em; line-height: 1.4em;-webkit-margin-before: 0.2em !important;}" content = content + " </style>" content = content + "<div class='header'>" content = content + "<div class='imageblock'><div class='image'>" if (movie['poster_path']): content = content + " <img class='imageimg' src='https://image.tmdb.org/t/p/w300_and_h450_bestv2" + \ movie['poster_path']+"'>" content = content + "</div><div class='side'><div class='title'><h2 style='display:inline-block'>" + \ movie["title"]+"</h2><span class='year'>("+movie["release_date"].split('-')[ 0]+")</span></div>" content = content + "<div class='header_info'><h3>Overview</h3></div>" content = content + "<div><p class='over'>" + \ movie["overview"]+"</p></div>" content = content + "<div class='header_info'><h3>Other titles</h3></div>" content = content + "<div><p class='over'>" + \ movie["original_title"]+"</p></div>" if (movie["tagline"]): content = content + "<div class='header_info'><h3>Tagline</h3></div>" content = content + "<div><p class='over'>" + \ movie["tagline"]+"</p></div>" content = content + "</div></div></div>" # Sidepanel#Imageblock content = content + "<div class='header_info'><h3>Cast</h3></div>" content = content + "<div>"+castsfull+"</div>" content = content + " <div class='header_info'><h3>Featured Crew</h3></div>" content = content + "<div>"+crewsfull+"</div>" if 'relatedartist' in movie: if (movie["relatedartist"]): relatedartist = movie["relatedartist"] relatedsongs = movie["relatedsongs"] content = content + "<div class='header_info'><h3>Soundtrack</h3></div>" content = content + "<div><p class='over'>Artists: " + \ html.unescape(movie["relatedartist"]).replace(";", " - ") content = content + "<br>Songs: " + \ html.unescape(movie["relatedsongs"]).replace( ";", " - ")+"</p></div>" content = content + "<div class='header_info'><h3>Other info</h3></div>" content = content + "<div><p class='over'>Status: "+movie["status"] content = content + "<br>Release date: "+movie["release_date"] content = content + "<br>Budget: "+'${:0,.2f}'.format(movie["budget"]) content = content + "<br>Revenue: " + \ '${:0,.2f}'.format(movie["revenue"]) content = content + "<br>Profit: " + \ '${:0,.2f}'.format(movie["revenue"]-movie["budget"]) content = content + "<br>Popularity: "+str(int(movie["popularity"])) content = content + "<br>Spoken languages: " + \ spoken[:-1].replace(";", ' - ') content = content + "<br>Genres: "+genres[:-1].replace(";", ' - ') content = content + "<br>Keywords: " + \ html.unescape(keywords[:-1]).replace(";", " - ")+"</p></div>" if 'mysentimentvalue' in movie: content = content + \ "<div class='header_info'><h3>Sentiment (by MeaningCloud) on Reviews</h3></div>" content = content + "<div><p class='over'>Sentiment: " + \ movie["mysentimentvalue"] content = content + "<br>Agreement: "+movie["mysentimentagree"] content = content + "<br>Subjectivity: "+movie["mysentimentsubj"] content = content + "<br>Irony: " + \ movie["mysentimentirony"]+"</p></div>" # content = content + "<br>Based upon:<br><p style='font-size:30%;'>"+movie["allreviews"]+"</p>" content = content + "</body></html>" # Geocode body = "" # For reviews containsattachment = "" if (movie["allreviews"]): containsattachment = True mydoc.SetContentAndZLibCompress(content) meta["connectortype"] = "Push" meta["mytype"] = "Movie" meta["myimage"] = movie["poster_path"] meta["mycountry"] = mcountry meta["mycity"] = mcity meta["myrevenue"] = movie["revenue"] meta["containsattachment"] = containsattachment meta["mygenre"] = html.unescape(genres) meta["myvotecount"] = movie["vote_count"] meta["language"] = "English" meta["mystatus"] = movie["status"] meta["myrelatedartist"] = html.unescape(relatedartist) meta["myrelatedsongs"] = html.unescape(relatedsongs) meta["myspokenlang"] = html.unescape(spoken) meta["mypeople"] = html.unescape(allpeople) meta["mycast"] = html.unescape(casts) meta["mycrews"] = crews if "imdb_id" in movie: meta["myimdb"] = movie["imdb_id"] meta["myreviews"] = movie["allreviews"] meta["mypopularity"] = int(movie["popularity"]) meta["myvoteaverage"] = movie["vote_average"] meta["mybudget"] = movie["budget"] myprofitvalue = 0 myprofit = movie["revenue"]-movie["budget"] # this could mess up the ranking big time myprofitvalue = myprofit/1000000 if (myprofitvalue > 1000): myprofitvalue = 1000 meta["myprofit"] = myprofit meta["myprofitvalue"] = myprofitvalue meta["title"] = movie["title"]+' ('+mcity+')' # meta["topparentid"]= movie['id'] mydoc.ClickableUri = 'https://www.themoviedb.org/movie/' + str(movie['id']) meta["myid"] = movie['id'] meta["myvid"] = str(movie['id']) meta["myids"] = str(movie['id']) mydoc.Date = movie['release_date'] # sentiment if 'mysentimentvalue' in movie: meta["mysentimentvalue"] = movie["mysentimentvalue"] meta["mysentimentagree"] = movie["mysentimentagree"] meta["mysentimentsubj"] = movie["mysentimentsubj"] meta["mysentimentirony"] = movie["mysentimentirony"] sentval = 0 addval = 0 if (movie['mysentimentvalue'] == "Strong Positive"): sentval = 2 addval = -1 if (movie["mysentimentvalue"] == "Positive"): sentval = 1 addval = -1 if (movie["mysentimentvalue"] == "Neutral"): sentval = 0 addval = 0 if (movie["mysentimentvalue"] == "Negative"): sentval = -1 addval = 1 if (movie["mysentimentvalue"] == "Strong Negative"): sentval = -2 addval = 1 if (movie['mysentimentagree'] == "Disagreement"): sentval = sentval+addval if (movie['mysentimentagree'] == "Agreement"): sentval = sentval-addval if (movie['mysentimentsubj'] == "Subjective"): sentval = sentval+addval if (movie['mysentimentsubj'] == "Objective"): sentval = sentval-addval if (movie['mysentimentirony'] == "Ironic"): sentval = sentval-addval if (movie['mysentimentirony'] == "Non-Ironic"): sentval = sentval+addval meta["mysentimentnumber"] = sentval if (lat != -999): meta["mylon"] = mlon meta["mylat"] = mlat for key in meta: mydoc.AddMetadata(key, meta[key]) return mydoc