def create_tasks(engine): log.info("Updating tasks on pyBossa...") app = setup() with flask_app.open_resource('resources/pbnetworks_template.html') as f: app.info['task_presenter'] = f.read() pbclient.update_app(app) tasks = pbclient.get_tasks(app.id, limit=30000) existing = dict([(t.data.get('info').get('signature'), t) for t in tasks]) for rep in sl.all(engine, sl.get_table(engine, 'representative')): networking = rep.get('networking') if networking is None or len(networking.strip()) < 3: continue signature = rep.get('identification_code') + networking signature = sha1(signature.encode('ascii', 'ignore')).hexdigest() rep['signature'] = signature print [rep.get('name')] log.debug("Task: %s", rep['name']) rep['last_update_date'] = rep['last_update_date'].isoformat() rep['registration_date'] = rep['registration_date'].isoformat() #print [(k, type(v)) for k,v in rep.items()] if signature in existing: task = existing.get(signature) task.data['info'] = rep pbclient.update_task(task) else: pbclient.create_task(app.id, rep)
def get_countries(): if not len(COUNTRIES): with app.open_resource('resources/countries.csv') as f: reader = csv.DictReader(f) for row in reader: row = dict([(k,v.decode('utf-8')) for (k,v) in row.items()]) row['euname'] = row.get('euname','').lower().strip() COUNTRIES.append(row) return COUNTRIES
def extract_data(engine): log.info("Extracting unregistered interests data...") taglabel='situation:unregistered' unregtag={'tag': taglabel} sl.upsert(engine, sl.get_table(engine, 'tag'), unregtag, ['tag']) unregtag=sl.find_one(engine,sl.get_table(engine, 'tag'),tag=taglabel) with app.open_resource('resources/unregistered-companies.csv') as csvfile: csvreader = csv.reader(csvfile, delimiter=',', quotechar='"') for i, rep in enumerate(csvreader): load_rep(rep, engine, unregtag) if i % 100 == 0: log.info("Extracted: %s...", i)