def _process_crossref_name(self, contributor): # Adapted from logic used in `api/citations/utils.py` # If the user has a family and given name, use those if contributor.family_name and contributor.given_name: given = contributor.given_name middle = contributor.middle_names family = contributor.family_name suffix = contributor.suffix else: names = impute_names(contributor.fullname) given = names.get('given') middle = names.get('middle') family = names.get('family') suffix = names.get('suffix') given_name = ' '.join([given, middle]).strip() given_stripped = remove_control_characters(given_name) # For crossref, given_name is not allowed to have numbers or question marks given_processed = ''.join( [char for char in given_stripped if (not char.isdigit() and char != '?')] ) surname_processed = remove_control_characters(family) surname = surname_processed or given_processed or contributor.fullname processed_names = {'surname': surname[:CROSSREF_SURNAME_LIMIT].strip()} if given_processed and surname_processed: processed_names['given_name'] = given_processed[:CROSSREF_GIVEN_NAME_LIMIT].strip() if suffix and (surname_processed or given_processed): processed_names['suffix'] = suffix[:CROSSREF_SUFFIX_LIMIT].strip() return processed_names
def _process_crossref_name(self, contributor): # Adapted from logic used in `api/citations/utils.py` # If the user has a family and given name, use those if contributor.family_name and contributor.given_name: given = contributor.given_name middle = contributor.middle_names family = contributor.family_name suffix = contributor.suffix else: names = impute_names(contributor.fullname) given = names.get('given') middle = names.get('middle') family = names.get('family') suffix = names.get('suffix') given_name = ' '.join([given, middle]).strip() given_stripped = remove_control_characters(given_name) # For crossref, given_name is not allowed to have numbers or question marks given_processed = ''.join([ char for char in given_stripped if (not char.isdigit() and char != '?') ]) surname_processed = remove_control_characters(family) surname = surname_processed or given_processed or contributor.fullname processed_names = {'surname': surname[:CROSSREF_SURNAME_LIMIT].strip()} if given_processed and surname_processed: processed_names[ 'given_name'] = given_processed[: CROSSREF_GIVEN_NAME_LIMIT].strip( ) if suffix and (surname_processed or given_processed): processed_names['suffix'] = suffix[:CROSSREF_SUFFIX_LIMIT].strip() return processed_names
def update_guessed_names(self): """Updates the CSL name fields inferred from the the full name. """ parsed = utils.impute_names(self.fullname) self.given_name = parsed['given'] self.middle_names = parsed['middle'] self.family_name = parsed['family'] self.suffix = parsed['suffix']
def update_guessed_names(self): """Updates the CSL name fields inferred from the the full name. """ parsed = utils.impute_names(self.fullname) self.given_name = parsed["given"] self.middle_names = parsed["middle"] self.family_name = parsed["family"] self.suffix = parsed["suffix"]
def impute_names(): for user in models.User.find(): parsed = impute_names(user.fullname) for field, value in parsed.items(): if getattr(user, field, None) is None: setattr(user, field, value) user.save()
def test_process_crossref_name(self, crossref_client): contributor = AuthUserFactory() # Given name and no family name contributor.given_name = 'Hey' contributor.family_name = '' contributor.save() meta = crossref_client._process_crossref_name(contributor) imputed_names = impute_names(contributor.fullname) assert meta == {'surname': imputed_names['family'], 'given_name': imputed_names['given']} # Just one name contributor.fullname = 'Ke$ha' contributor.given_name = '' contributor.family_name = '' contributor.save() meta = crossref_client._process_crossref_name(contributor) assert meta == {'surname': contributor.fullname} # Number and ? in given name contributor.fullname = 'Scotty2Hotty? Ronald Garland II' contributor.given_name = '' contributor.family_name = '' contributor.save() meta = crossref_client._process_crossref_name(contributor) assert meta == {'given_name': 'ScottyHotty Ronald', 'surname': 'Garland', 'suffix': 'II'} # Long suffix is truncated to 10 characters long_suffix = 'PhD MD Esq MPH IV' contributor.given_name = 'Mark' contributor.family_name = 'Henry' contributor.suffix = long_suffix contributor.save() meta = crossref_client._process_crossref_name(contributor) assert meta['suffix'] == long_suffix[:crossref.CROSSREF_SUFFIX_LIMIT] # Long given_names and surnames are truncated to limit long_given = 'Maaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaark' long_surname = 'Henryyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy' contributor.given_name = long_given contributor.family_name = long_surname contributor.save() meta = crossref_client._process_crossref_name(contributor) assert meta['given_name'] == long_given[:crossref.CROSSREF_GIVEN_NAME_LIMIT] assert meta['surname'] == long_surname[:crossref.CROSSREF_SURNAME_LIMIT] # Unparsable given or surname just returns fullname as surname unparsable_fullname = 'Author (name withheld until double-blind peer review completes and this name is also really long)' contributor.given_name = '' contributor.family_name = '' contributor.fullname = unparsable_fullname contributor.save() meta = crossref_client._process_crossref_name(contributor) assert meta == {'surname': unparsable_fullname[:crossref.CROSSREF_SURNAME_LIMIT]}
def parse(self, csv_file): """ Parse and add to csv file. :param csv_file: Comma separated :return: A list """ result = [] csv_reader = csv.reader(csv_file) for index, row in enumerate(csv_reader): if index == 0: row.extend([ 'OSF ID', 'Logs Since Workshop', 'Nodes Created Since Workshop', 'Last Log Date' ]) result.append(row) continue email = row[5] user_by_email = self.find_user_by_email(email) if not user_by_email: full_name = row[4] try: family_name = impute_names(full_name)['family'] except UnicodeDecodeError: row.extend(['Unable to parse name']) result.append(row) continue user_by_name = self.find_user_by_full_name( full_name) or self.find_user_by_family_name(family_name) if not user_by_name: row.extend(['', 0, 0, '']) result.append(row) continue else: user = user_by_name else: user = user_by_email workshop_date = datetime.strptime(row[1], '%m/%d/%y') nodes = self.get_user_nodes_since_workshop(user, workshop_date) user_logs = self.get_user_logs_since_workshop(user, workshop_date) try: last_log_date = user_logs[-1].date.strftime('%m/%d/%y') except IndexError: last_log_date = '' row.extend([user.pk, len(user_logs), len(nodes), last_log_date]) result.append(row) return result
def parse(self, csv_file): """ Parse and add to csv file. :param csv_file: Comma separated :return: A list """ result = [] csv_reader = csv.reader(csv_file) for index, row in enumerate(csv_reader): if index == 0: row.extend([ 'OSF ID', 'Logs Since Workshop', 'Nodes Created Since Workshop', 'Last Log Date' ]) result.append(row) continue email = row[5] user_by_email = self.find_user_by_email(email) if not user_by_email: full_name = row[4] try: family_name = impute_names(full_name)['family'] except UnicodeDecodeError: row.extend(['Unable to parse name']) result.append(row) continue user_by_name = self.find_user_by_full_name(full_name) or self.find_user_by_family_name(family_name) if not user_by_name: row.extend(['', 0, 0, '']) result.append(row) continue else: user = user_by_name else: user = user_by_email workshop_date = datetime.strptime(row[1], '%m/%d/%y') nodes = self.get_user_nodes_since_workshop(user, workshop_date) user_logs = self.get_user_logs_since_workshop(user, workshop_date) try: last_log_date = user_logs[-1].date.strftime('%m/%d/%y') except IndexError: last_log_date = '' row.extend([ user.pk, len(user_logs), len(nodes), last_log_date ]) result.append(row) return result
def create_fake_user(): email = fake.email() name = fake.name() parsed = utils.impute_names(name) user = UserFactory(username=email, fullname=name, is_registered=True, is_claimed=True, date_registered=fake.date_time(), emails=[email], **parsed ) user.set_password('faker123') user.save() logger.info('Created user: {0} <{1}>'.format(user.fullname, user.username)) return user
def create_fake_user(): email = fake.email() name = fake.name() parsed = impute_names(name) user = UserFactory(username=email, fullname=name, is_registered=True, is_claimed=True, date_registered=fake.date_time(), emails=[email], **parsed) user.set_password('faker123') user.save() return user
def create_fake_user(): email = fake.email() name = fake.name() parsed = impute_names(name) user = UserFactory(username=email, fullname=name, is_registered=True, is_claimed=True, verification_key=security.random_string(15), date_registered=fake.date_time(), emails=[email], **parsed) user.set_password('faker123') user.save() return user
def create_fake_user(): email = fake.email() name = fake.name() parsed = impute_names(name) user = UserFactory( username=email, fullname=name, is_registered=True, is_claimed=True, date_registered=fake.date_time(), emails=[email], **parsed ) user.set_password('faker123') user.save() return user
def create_fake_user(): email = fake.email() name = fake.name() parsed = impute_names(name) user = UserFactory( username=email, fullname=name, is_registered=True, is_claimed=True, verification_key=security.random_string(15), date_registered=fake.date_time(), emails=[email], **parsed ) user.set_password('faker123') user.save() return user
def get_users_from_csv(filename): rows = set() with open(filename, 'rU') as csvfile: reader = csv.DictReader(csvfile) for row in reader: username = row['Email Address'] full_name = row['Name'] family_name = impute_names(full_name)['family'] parsed_date = row['Workshop_Date'].split("/") day, month, year = int(parsed_date[0]), int(parsed_date[1]), int('20'+parsed_date[2]) date = datetime.datetime(year, month, day) found_user = find_user_by_email(username) or find_user_by_fullname(full_name) or find_user_by_lastname(family_name) if found_user: log_count = count_user_logs(found_user, Q('date', 'gte', date)) node_count = count_user_nodes(found_user, Q('date_created', 'gte', date)) last_log = user_last_log(found_user) rows.add((date, found_user.fullname, found_user.username, found_user._id, log_count, node_count, last_log)) return rows
def get_users_from_csv(filename): rows = set() with open(filename, 'rU') as csvfile: reader = csv.DictReader(csvfile) for row in reader: email = row['Email Address'] full_name = row['Name'].decode('utf-8', 'ignore') names = impute_names(full_name) date = parse_date(row['Workshop_Date']) found_user = find_user_by_email(email) if not found_user and full_name: found_user = find_user_by_fullname(full_name) or find_user_by_names(names) if found_user: log_count = count_user_logs(found_user, Q('date', 'gte', date)) node_count = count_user_nodes(found_user, Q('date_created', 'gte', date)) last_log = user_last_log(found_user) rows.add((date, found_user.fullname, found_user.username, found_user._id, log_count, node_count, last_log)) return rows
def email_name(user): logging.debug('Emailing user {0}'.format(user.fullname)) names = {'fullname': user.fullname} names.update(impute_names(user.fullname)) message=email_template.format(**names).encode('utf-8') success = send_email( from_addr='*****@*****.**', to_addr=user.username, subject='Open Science Framework: Verify your citation information', message=message, mimetype='plain', ) if success: logging.debug('Emailing user {0}: Success'.format(user.fullname)) else: logging.debug('Emailing user {0}: Failure'.format(user.fullname))
def process_name(node, user): # If the user has a family and given name, use those if user.family_name and user.given_name: return { 'family_name': user.family_name, 'suffix': user.suffix, 'given_name': user.given_name, 'middle_names': user.middle_names, } elif user.is_registered or user.is_disabled: name = user.fullname else: name = user.get_unclaimed_record(node._id)['name'] # If the user doesn't autofill his family and given name parsed = utils.impute_names(name) return { 'family_name': parsed['family'], 'suffix': parsed['suffix'], 'given_name': parsed['given'], 'middle_names': parsed['middle'] }
def get_users_from_csv(filename): rows = set() with open(filename, 'rU') as csvfile: reader = csv.DictReader(csvfile) for row in reader: email = row['Email Address'] full_name = row['Name'].decode('utf-8', 'ignore') names = impute_names(full_name) date = parse_date(row['Workshop_Date']) found_user = find_user_by_email(email) if not found_user and full_name: found_user = find_user_by_fullname( full_name) or find_user_by_names(names) if found_user: log_count = count_user_logs(found_user, Q('date', 'gte', date)) node_count = count_user_nodes(found_user, Q('date_created', 'gte', date)) last_log = user_last_log(found_user) rows.add((date, found_user.fullname, found_user.username, found_user._id, log_count, node_count, last_log)) return rows
def get_users_from_csv(filename): rows = set() with open(filename, 'rU') as csvfile: reader = csv.DictReader(csvfile) for row in reader: username = row['Email Address'] full_name = row['Name'] family_name = impute_names(full_name)['family'] parsed_date = row['Workshop_Date'].split("/") day, month, year = int(parsed_date[0]), int( parsed_date[1]), int('20' + parsed_date[2]) date = datetime.datetime(year, month, day) found_user = find_user_by_email(username) or find_user_by_fullname( full_name) or find_user_by_lastname(family_name) if found_user: log_count = count_user_logs(found_user, Q('date', 'gte', date)) node_count = count_user_nodes(found_user, Q('date_created', 'gte', date)) last_log = user_last_log(found_user) rows.add((date, found_user.fullname, found_user.username, found_user._id, log_count, node_count, last_log)) return rows
def impute_names(**kwargs): name = request.args.get('name', '') return auth_utils.impute_names(name)
""" Impute name parts for all existing users. """ from framework.auth.utils import impute_names from website.app import init_app from website import models app = init_app('website.settings', set_backends=True, routes=True) def impute_names(): for user in models.User.find(): parsed = impute_names(user.fullname) for field, value in parsed.items(): if getattr(user, field, None) is None: setattr(user, field, value) user.save() if __name__ == '__main__': impute_names()
def impute_names(**kwargs): name = request.args.get("name", "") return auth_utils.impute_names(name)