def load_campaign(filename, campaign=None, limit=None): with open("data/" + filename, "r") as f: lines = f.read().split("\n") print "found {} ORCID lines".format(len(lines)) print len(lines) if limit: lines = lines[:limit] total_start = time() row_num = 0 for line in lines: row_num += 1 # can have # as comments if line.startswith("#"): print "skipping comment line" continue loop_start = time() email = None if "," in line: (dirty_orcid, email, twitter) = line.split(",") else: dirty_orcid = line try: orcid_id = clean_orcid(dirty_orcid) except NoOrcidException: try: print u"\n\nWARNING: no valid orcid_id in line {}; skipping\n\n".format( line) except UnicodeDecodeError: print u"\n\nWARNING: no valid orcid_id and line throws UnicodeDecodeError; skipping\n\n" continue my_person = Person.query.filter_by(orcid_id=orcid_id).first() if my_person: print u"row {}, already have person {}, skipping".format( row_num, orcid_id) else: print u"row {}, making person {}".format(row_num, orcid_id) my_person = make_person(orcid_id, high_priority=False) my_person.campaign = campaign my_person.email = email my_person.twitter = twitter db.session.merge(my_person) commit_success = safe_commit(db) if not commit_success: print u"COMMIT fail on {}".format(my_person.orcid_id) print "row {}: finished {} in {}s\n".format(row_num, orcid_id, elapsed(loop_start)) print "finished load_campaign on {} profiles in {}s\n".format( len(lines), elapsed(total_start))
def refresh(dirty_orcid): try: orcid_id = clean_orcid(dirty_orcid) except NoOrcidException: print u"\n\nWARNING: no valid orcid_id in {}; skipping\n\n".format(dirty_orcid) raise refresh_profile(orcid_id)
def load_campaign(filename, campaign=None, limit=None): with open("data/" + filename, "r") as f: lines = f.read().split("\n") print "found {} ORCID lines".format(len(lines)) print len(lines) if limit: lines = lines[:limit] total_start = time() row_num = 0 for line in lines: row_num += 1 # can have # as comments if line.startswith("#"): print "skipping comment line" continue loop_start = time() email = None if "," in line: (dirty_orcid, email, twitter) = line.split(",") else: dirty_orcid = line try: orcid_id = clean_orcid(dirty_orcid) except NoOrcidException: try: print u"\n\nWARNING: no valid orcid_id in line {}; skipping\n\n".format(line) except UnicodeDecodeError: print u"\n\nWARNING: no valid orcid_id and line throws UnicodeDecodeError; skipping\n\n" continue my_person = Person.query.filter_by(orcid_id=orcid_id).first() if my_person: print u"row {}, already have person {}, skipping".format(row_num, orcid_id) else: print u"row {}, making person {}".format(row_num, orcid_id) my_person = make_person(orcid_id, store_in_db=True) my_person.campaign = campaign my_person.email = email my_person.twitter = twitter db.session.merge(my_person) commit_success = safe_commit(db) if not commit_success: print u"COMMIT fail on {}".format(my_person.orcid_id) print "row {}: finished {} in {}s\n".format(row_num, orcid_id, elapsed(loop_start)) print "finished load_campaign on {} profiles in {}s\n".format(len(lines), elapsed(total_start))
def refresh(dirty_orcid): try: orcid_id = clean_orcid(dirty_orcid) except NoOrcidException: print u"\n\nWARNING: no valid orcid_id in {}; skipping\n\n".format( dirty_orcid) raise refresh_profile(orcid_id)
def make_person(dirty_orcid_id, high_priority=False): orcid_id = clean_orcid(dirty_orcid_id) my_person = Person(orcid_id=orcid_id) db.session.add(my_person) print u"\nin make_person: made new person for {}".format(orcid_id) my_person.refresh(high_priority=high_priority) commit_success = safe_commit(db) if not commit_success: print u"COMMIT fail on {}".format(orcid_id) if my_person.invalid_orcid: raise OrcidDoesNotExist return my_person
def create_person(dirty_orcid, campaign=None): try: orcid_id = clean_orcid(dirty_orcid) except NoOrcidException: print u"\n\nWARNING: no valid orcid_id in {}; skipping\n\n".format(dirty_orcid) raise my_person = add_or_overwrite_person_from_orcid_id(orcid_id, high_priority=False) if campaign: my_person.campaign = campaign db.session.add(my_person) success = safe_commit(db) if not success: print u"ERROR! committing {}".format(my_person.orcid_id)
def just_add_twitter(filename, limit=None, create=True): with open("data/" + filename, "r") as f: lines = f.read().split("\n") print "found {} ORCID lines".format(len(lines)) if limit: lines = lines[:limit] total_start = time() for line in lines: loop_start = time() email = None twitter = None if "," in line: (dirty_orcid, email, twitter) = line.split(",") else: dirty_orcid = line if twitter: twitter = twitter.replace("@", "") try: orcid_id = clean_orcid(dirty_orcid) except NoOrcidException: try: print u"\n\nWARNING: no valid orcid_id in line {}; skipping\n\n".format( line) except UnicodeDecodeError: print u"\n\nWARNING: no valid orcid_id and line throws UnicodeDecodeError; skipping\n\n" continue my_person = Person.query.filter_by(orcid_id=orcid_id).first() if my_person: my_person.twitter = twitter db.session.merge(my_person) commit_success = safe_commit(db) if not commit_success: print u"COMMIT fail on {}".format(orcid_id) print u"added twitter {} to {}".format(twitter, orcid_id) else: print u"no person found with id {}".format(orcid_id) print "loaded {} profiles in {}s\n".format(len(lines), elapsed(total_start))
def just_add_twitter(filename, limit=None, create=True): with open("data/" + filename, "r") as f: lines = f.read().split("\n") print "found {} ORCID lines".format(len(lines)) if limit: lines = lines[:limit] total_start = time() for line in lines: loop_start = time() email = None twitter = None if "," in line: (dirty_orcid, email, twitter) = line.split(",") else: dirty_orcid = line if twitter: twitter = twitter.replace("@", "") try: orcid_id = clean_orcid(dirty_orcid) except NoOrcidException: try: print u"\n\nWARNING: no valid orcid_id in line {}; skipping\n\n".format(line) except UnicodeDecodeError: print u"\n\nWARNING: no valid orcid_id and line throws UnicodeDecodeError; skipping\n\n" continue my_person = Person.query.filter_by(orcid_id=orcid_id).first() if my_person: my_person.twitter = twitter db.session.merge(my_person) commit_success = safe_commit(db) if not commit_success: print u"COMMIT fail on {}".format(orcid_id) print u"added twitter {} to {}".format(twitter, orcid_id) else: print u"no person found with id {}".format(orcid_id) print "loaded {} profiles in {}s\n".format(len(lines), elapsed(total_start))
def create_person(dirty_orcid, campaign=None): try: orcid_id = clean_orcid(dirty_orcid) except NoOrcidException: print u"\n\nWARNING: no valid orcid_id in {}; skipping\n\n".format( dirty_orcid) raise my_person = add_or_overwrite_person_from_orcid_id(orcid_id, high_priority=False) if campaign: my_person.campaign = campaign db.session.add(my_person) success = safe_commit(db) if not success: print u"ERROR! committing {}".format(my_person.orcid_id)
def create_person(dirty_orcid, campaign=None, store_in_db=False): try: orcid_id = clean_orcid(dirty_orcid) except NoOrcidException: print u"\n\nWARNING: no valid orcid_id in {}; skipping\n\n".format(dirty_orcid) raise if store_in_db: print u"storing in db" my_person = make_person(orcid_id, store_in_db=True) if campaign: my_person.campaign = campaign db.session.add(my_person) success = safe_commit(db) if not success: print u"ERROR! committing {}".format(my_person.orcid_id) else: print u"NOT storing in db" my_person = make_person(orcid_id, store_in_db=False) print my_person
def create_person(dirty_orcid, campaign=None, store_in_db=False): try: orcid_id = clean_orcid(dirty_orcid) except NoOrcidException: print u"\n\nWARNING: no valid orcid_id in {}; skipping\n\n".format( dirty_orcid) raise if store_in_db: print u"storing in db" my_person = make_person(orcid_id, store_in_db=True) if campaign: my_person.campaign = campaign db.session.add(my_person) success = safe_commit(db) if not success: print u"ERROR! committing {}".format(my_person.orcid_id) else: print u"NOT storing in db" my_person = make_person(orcid_id, store_in_db=False) print my_person