def parse_records(unparsed, default_project): """ Parses records read from a csv file or coming from the upload box Parameters unparsed : records in array form, as read from a csv default_project : the default project to attribute to records without one Returns the parsed records in this form: {'username':'******', 'project':'as specified or default'} """ records = [] for r in unparsed: if r is not None and len(r) > 0: # NOTE: the reason for the crazy -1 and comma joins # is that some users can have commas in their name # NOTE: This makes it impossible to add fields to the csv in the future, # so maybe require the project to be the first field # and the username to be the last or maybe change to a tsv format if len(r) > 1: username = "******".join([str(p) for p in r[:-1]]) project = r[-1].decode('utf8') or default_project else: username = r[0] project = default_project if username is not None and len(username): records.append({ 'username' : parse_username(username), 'project' : project, }) return records
def create_users_from_file(self, filename): """ Adds a bunch of users to mediawiki user table from a file with usernames. In order to test encoding make sure the bindings of the testing and production databases match, we try to replicate as accurate as possible the structure of mediawiki db in our testing db but that is ongoing work that needs to be maintaned. Parameters: filename : Name of a file that contains a cohort with user names test will search for file in tests/static/public folder Return: names: Array with the names of the users created as they appear on the file but capitalized to mediawiki convention """ # open the cohort file test_cohort_file = os.sep.join((self.test_report_path, 'static', 'public', filename)) f = open(test_cohort_file, 'r') names = [] # format names according to our convention for line in f: name = parse_username(line.strip()) names.append(name) self.mwSession.bind.engine.execute( MediawikiUser.__table__.insert(), [ { 'user_name': '{0}'.format(n), 'user_registration': 20130101000000, 'user_email_token_expires': 20200101000000 } for n in names ] ) self.mwSession.commit() return names
def create_users_from_file(self, filename): """ Adds a bunch of users to mediawiki user table from a file with usernames. In order to test encoding make sure the bindings of the testing and production databases match, we try to replicate as accurate as possible the structure of mediawiki db in our testing db but that is ongoing work that needs to be maintaned. Parameters: filename : Name of a file that contains a cohort with user names test will search for file in tests/static/public folder Return: names: Array with the names of the users created as they appear on the file but capitalized to mediawiki convention """ # open the cohort file test_cohort_file = os.sep.join( (self.test_report_path, 'static', 'public', filename)) f = open(test_cohort_file, 'r') names = [] # format names according to our convention for line in f: name = parse_username(line.strip()) names.append(name) self.mwSession.bind.engine.execute( MediawikiUser.__table__.insert(), [{ 'user_name': '{0}'.format(n), 'user_registration': 20130101000000, 'user_email_token_expires': 20200101000000 } for n in names]) self.mwSession.commit() return names
def format_records(csv_lines, default_project): """ Processes and formats lines read from a csv file or coming from the upload box. i.e. "dan,en" becomes {'raw_id_or_name': 'dan', 'project': 'en'}, and "1, en" becomes {'raw_id_or_name': '1', 'project': 'en'}. Note this method assumes bytes (str) not unicode types as input Parameters csv_lines : collection of strings, each with csv format default_project : the default project to attribute to records without one Returns a list of the formatted records in which each element is of this form: {'raw_id_or_name':'parsed user name or id', 'project':'as specified or default'} """ records = [] for r in csv.reader(csv_lines): if r is not None and len(r) > 0: # NOTE: the reason for the crazy -1 and comma joins # is that some users can have commas in their name # NOTE: This makes it impossible to add fields to the csv in the future, # so maybe require the project to be the first field # and the username to be the last or maybe change to a tsv format if len(r) > 1: raw_id_or_name = ",".join([str(p) for p in r[:-1]]) project = r[-1].strip() or default_project else: raw_id_or_name = r[0] project = default_project if raw_id_or_name is not None and len(raw_id_or_name): records.append({ 'raw_id_or_name' : parse_username(raw_id_or_name), 'project' : project, }) return records
def format_records(csv_lines, default_project): """ Processes and formats lines read from a csv file or coming from the upload box. i.e. "dan,en" becomes {'raw_id_or_name': 'dan', 'project': 'en'}, and "1, en" becomes {'raw_id_or_name': '1', 'project': 'en'}. Note this method assumes bytes (str) not unicode types as input Parameters csv_lines : collection of strings, each with csv format default_project : the default project to attribute to records without one Returns a list of the formatted records in which each element is of this form: {'raw_id_or_name':'parsed user name or id', 'project':'as specified or default'} """ records = [] for r in csv.reader(csv_lines): if r is not None and len(r) > 0: # NOTE: the reason for the crazy -1 and comma joins # is that some users can have commas in their name # NOTE: This makes it impossible to add fields to the csv in the future, # so maybe require the project to be the first field # and the username to be the last or maybe change to a tsv format if len(r) > 1: raw_id_or_name = ",".join([str(p) for p in r[:-1]]) project = r[-1].strip() or default_project else: raw_id_or_name = r[0] project = default_project if raw_id_or_name is not None and len(raw_id_or_name): records.append({ 'raw_id_or_name': parse_username(raw_id_or_name), 'project': project, }) return records