Python parse_username Examples, wikimetrics.utils.parse_username Python Examples

Example #1

0

Show file

File: cohort_upload.py Project: philiptzou/analytics-wikimetrics

def parse_records(unparsed, default_project):
    """
    Parses records read from a csv file or coming from the upload box
    
    Parameters
        unparsed        : records in array form, as read from a csv
        default_project : the default project to attribute to records without one
    
    Returns
        the parsed records in this form:
            {'username':'******', 'project':'as specified or default'}
    """
    records = []
    for r in unparsed:
        if r is not None and len(r) > 0:
            # NOTE: the reason for the crazy -1 and comma joins
            # is that some users can have commas in their name
            # NOTE: This makes it impossible to add fields to the csv in the future,
            # so maybe require the project to be the first field
            # and the username to be the last or maybe change to a tsv format
            if len(r) > 1:
                username = "******".join([str(p) for p in r[:-1]])
                project = r[-1].decode('utf8') or default_project
            else:
                username = r[0]
                project = default_project
            
            if username is not None and len(username):
                records.append({
                    'username'  : parse_username(username),
                    'project'   : project,
                })
    return records

Example #2

0

Show file

File: test_validate_cohort.py Project: wikimedia/analytics-wikimetrics

    def create_users_from_file(self, filename):
        """
        Adds a bunch of users to mediawiki user table from a file
        with usernames.

        In order to test encoding make sure the bindings of the testing and production
        databases match, we try to replicate as accurate as possible the structure
        of mediawiki db in our testing db but that is ongoing work that needs to be
        maintaned.

        Parameters:
            filename : Name of a file that contains a cohort with user names
                       test will search for file in tests/static/public folder
        Return:
            names: Array with the names of the users created as they appear on the file
                   but capitalized to mediawiki convention
        """

        # open the cohort file
        test_cohort_file = os.sep.join((self.test_report_path, 'static',
                                        'public', filename))
        f = open(test_cohort_file, 'r')
        names = []

        # format names according to our convention
        for line in f:
            name = parse_username(line.strip())
            names.append(name)

        self.mwSession.bind.engine.execute(
            MediawikiUser.__table__.insert(), [
                {
                    'user_name': '{0}'.format(n),
                    'user_registration': 20130101000000,
                    'user_email_token_expires': 20200101000000
                }
                for n in names
            ]
        )
        self.mwSession.commit()

        return names

Example #3

0

Show file

    def create_users_from_file(self, filename):
        """
        Adds a bunch of users to mediawiki user table from a file
        with usernames.

        In order to test encoding make sure the bindings of the testing and production
        databases match, we try to replicate as accurate as possible the structure
        of mediawiki db in our testing db but that is ongoing work that needs to be
        maintaned.

        Parameters:
            filename : Name of a file that contains a cohort with user names
                       test will search for file in tests/static/public folder
        Return:
            names: Array with the names of the users created as they appear on the file
                   but capitalized to mediawiki convention
        """

        # open the cohort file
        test_cohort_file = os.sep.join(
            (self.test_report_path, 'static', 'public', filename))
        f = open(test_cohort_file, 'r')
        names = []

        # format names according to our convention
        for line in f:
            name = parse_username(line.strip())
            names.append(name)

        self.mwSession.bind.engine.execute(
            MediawikiUser.__table__.insert(),
            [{
                'user_name': '{0}'.format(n),
                'user_registration': 20130101000000,
                'user_email_token_expires': 20200101000000
            } for n in names])
        self.mwSession.commit()

        return names

Example #4

0

Show file

File: cohort_upload.py Project: OrenBochman/analytics-wikimetrics

def format_records(csv_lines, default_project):
    """
    Processes and formats lines read from a csv file or coming from the upload box.
    i.e. "dan,en" becomes {'raw_id_or_name': 'dan', 'project': 'en'}, and
    "1, en" becomes {'raw_id_or_name': '1', 'project': 'en'}.
    Note this method assumes bytes (str) not unicode types as input

    Parameters
        csv_lines       : collection of strings, each with csv format
        default_project : the default project to attribute to records without one

    Returns
        a list of the formatted records in which each element is of this form:
        {'raw_id_or_name':'parsed user name or id', 'project':'as specified or default'}
    """
    records = []
    for r in csv.reader(csv_lines):
        if r is not None and len(r) > 0:
            # NOTE: the reason for the crazy -1 and comma joins
            # is that some users can have commas in their name
            # NOTE: This makes it impossible to add fields to the csv in the future,
            # so maybe require the project to be the first field
            # and the username to be the last or maybe change to a tsv format
            if len(r) > 1:
                raw_id_or_name = ",".join([str(p) for p in r[:-1]])
                project = r[-1].strip() or default_project
            else:
                raw_id_or_name = r[0]
                project = default_project

            if raw_id_or_name is not None and len(raw_id_or_name):
                records.append({
                    'raw_id_or_name'  : parse_username(raw_id_or_name),
                    'project'         : project,
                })
    return records

Example #5

0

Show file

def format_records(csv_lines, default_project):
    """
    Processes and formats lines read from a csv file or coming from the upload box.
    i.e. "dan,en" becomes {'raw_id_or_name': 'dan', 'project': 'en'}, and
    "1, en" becomes {'raw_id_or_name': '1', 'project': 'en'}.
    Note this method assumes bytes (str) not unicode types as input

    Parameters
        csv_lines       : collection of strings, each with csv format
        default_project : the default project to attribute to records without one

    Returns
        a list of the formatted records in which each element is of this form:
        {'raw_id_or_name':'parsed user name or id', 'project':'as specified or default'}
    """
    records = []
    for r in csv.reader(csv_lines):
        if r is not None and len(r) > 0:
            # NOTE: the reason for the crazy -1 and comma joins
            # is that some users can have commas in their name
            # NOTE: This makes it impossible to add fields to the csv in the future,
            # so maybe require the project to be the first field
            # and the username to be the last or maybe change to a tsv format
            if len(r) > 1:
                raw_id_or_name = ",".join([str(p) for p in r[:-1]])
                project = r[-1].strip() or default_project
            else:
                raw_id_or_name = r[0]
                project = default_project

            if raw_id_or_name is not None and len(raw_id_or_name):
                records.append({
                    'raw_id_or_name': parse_username(raw_id_or_name),
                    'project': project,
                })
    return records