def load_trip_plans(mode): if not mode or mode == 'all': db_user = user.User.get_by_email(GUIDE_USER) return data.load_all_trip_plans_for_creator(db_user.id) elif mode in ('urls', 'ids'): trip_plan_ids = [] for row in csv.reader(fileinput.input()): id_or_url = row[0] try: if mode == 'urls': trip_plan_id = int(id_or_url.split('/')[-1]) else: trip_plan_id = int(id_or_url) except: continue trip_plan_ids.append(trip_plan_id) return data.load_trip_plans_by_ids(trip_plan_ids) return None
def main(infile): db_user = user.User.get_by_email(GUIDE_USER) assert db_user session_info = data.SessionInfo(db_user=db_user) service = serviceimpls.AdminService(session_info) logfile = lf = open('bulk_guide_creator_%s.log' % \ datetime.datetime.now().strftime('%Y%m%d-%H-%M-%S'), 'w') all_trip_plans_for_user = data.load_all_trip_plans_for_creator(db_user.id) for line in infile: url = line.strip() if not trip_plan_creator.has_parser(url): logprint(lf, 'Unable to find parser: %s\n-----' % url) continue if SKIP_IF_GUIDE_EXISTS: canonical_url = trip_plan_creator.canonicalize_url(url) guide_exists = False for trip_plan in all_trip_plans_for_user: if trip_plan.source_url == canonical_url: logprint(lf, 'Trip plan already exists (%d): %s' % ( trip_plan.trip_plan_id, url)) guide_exists = True break if guide_exists: continue logprint(lf, 'Beginning parsing on %s' % url) req = serviceimpls.ParseTripPlanRequest(url=url) try: resp = service.parsetripplan(req) except Exception: logprint(lf, 'Error: %s\n%s\n-----' % (url, traceback.format_exc())) continue logprint(lf, 'Completed parsing %s (%d): "%s"' % ( url, resp.trip_plan.trip_plan_id, resp.trip_plan.name)) all_trip_plans_for_user.append(resp.trip_plan) logprint(lf, '-----') time.sleep(SLEEP_TIME_SECS) logfile.close()
def main(): db_user = user.User.get_by_email(GUIDE_USER) all_trip_plans_for_user = data.load_all_trip_plans_for_creator(db_user.id) outfile = cStringIO.StringIO() writer = csv.writer(outfile) writer.writerow([ 'name', 'location_name', 'source host', 'source_url', 'trip plan url', 'admin url', 'missing_location_name', 'missing_location_latlng', 'missing_cover_image', 'num_entities', 'num_missing_name', 'num_missing_photos', 'num_missing_location', 'num_weird_location', 'num_missing_category', 'num_missing_description', ]) for trip_plan in all_trip_plans_for_user: if not trip_plan.source_url: continue source_host = urlparse.urlparse(trip_plan.source_url).netloc.lower() num_missing_name = 0 num_missing_photos = 0 num_missing_location = 0 num_weird_location = 0 num_missing_category = 0 num_missing_description = 0 for e in trip_plan.entities: if not e.name: num_missing_name += 1 if not e.photo_urls: num_missing_photos += 1 if not e.latlng: num_missing_location += 1 elif trip_plan.location_latlng: distance_from_guide_center = geometry.earth_distance_meters( trip_plan.location_latlng.lat, trip_plan.location_latlng.lng, e.latlng.lat, e.latlng.lng) if distance_from_guide_center > 10000: num_weird_location += 1 if not e.category or not e.category.category_id: num_missing_category += 1 if not e.description: num_missing_description += 1 writer.writerow([ trip_plan.name.encode('utf-8') if trip_plan.name else None, trip_plan.location_name.encode('utf-8') if trip_plan.location_name else None, source_host, trip_plan.source_url.encode('utf-8') if trip_plan.source_url else None, '%s/guide/%s' % (constants.BASE_URL, trip_plan.trip_plan_id), '%s/admin/editor/%s' % (constants.BASE_URL, trip_plan.trip_plan_id), 0 if trip_plan.location_name else 1, 0 if trip_plan.location_latlng else 1, 0 if trip_plan.cover_image_url else 1, len(trip_plan.entities), num_missing_name, num_missing_photos, num_missing_location, num_weird_location, num_missing_category, num_missing_description, ]) print outfile.getvalue() outfile.close()