def ingest(cls, package="osp.fields", path="data/fields.csv"):

        """
        Ingest fields.

        Args:
            package (str)
            path (str)
        """

        reader = read_csv(package, path)

        for row in reader:

            # Sanifize field names.
            pf = clean_field_name(row["Primary Field"])
            sf = clean_field_name(row["Secondary Field"])

            # Parse abbreviations.
            abbrs = parse_abbrs(row["ABBRV"])
            if abbrs:
                abbrs = filter_abbrs(abbrs)

            # If parent field, write row.
            if bool(row["Alpha Category"]):
                Field.create(name=pf)

            # Query for a parent field.
            field = Field.select().where(Field.name == pf).first()

            if field:

                Subfield.create(name=sf, abbreviations=abbrs, field=field)
    def ingest_world(cls,
        package='osp.institutions',
        path='data/world.csv',
    ):

        """
        Insert world universities.
        """

        reader = read_csv(package, path)

        for row in reader:
            if row['country'] != 'US':

                # Normalize the URL.
                url = row['url'].strip()
                domain = parse_domain(url)

                # Clean the fields.
                name = row['name'].strip()
                country = row['country'].strip()

                try:
                    cls.create(
                        name=name,
                        url=url,
                        domain=domain,
                        state=None,
                        country=country,
                    )

                except IntegrityError:
                    pass
Exemple #3
0
    def ingest(cls, package='osp.fields', path='data/fields.csv'):
        """
        Ingest fields.

        Args:
            package (str)
            path (str)
        """

        reader = read_csv(package, path)

        for row in reader:

            # Sanifize field names.
            pf = clean_field_name(row['Primary Field'])
            sf = clean_field_name(row['Secondary Field'])

            # Parse abbreviations.
            abbrs = parse_abbrs(row['ABBRV'])
            if abbrs: abbrs = filter_abbrs(abbrs)

            # If parent field, write row.
            if bool(row['Alpha Category']):
                Field.create(name=pf)

            # Query for a parent field.
            field = Field.select().where(Field.name == pf).first()

            if field:

                Subfield.create(
                    name=sf,
                    abbreviations=abbrs,
                    field=field,
                )
    def ingest_usa(cls,
        package='osp.institutions',
        path='data/usa.csv',
    ):

        """
        Insert US universities.
        """

        reader = read_csv(package, path)

        for row in reader:
            if row['e_country'] == 'USA':

                # Normalize the URL.
                url = row['web_url'].strip()
                domain = parse_domain(url)

                # Clean the fields.
                name = row['biz_name'].strip()
                state = row['e_state'].strip()

                try:
                    cls.create(
                        name=name,
                        url=url,
                        domain=domain,
                        state=state,
                        country='US',
                    )

                except IntegrityError:
                    pass
Exemple #5
0
    def insert_institutions(cls):

        """
        Write institution rows into the database.
        """

        reader = read_csv("osp.institutions", "data/institutions.csv")

        rows = []
        for row in reader:
            rows.append({"metadata": row})

        with cls._meta.database.transaction():
            cls.insert_many(rows).execute()

        reader = read_csv("osp.institutions", "data/institutions_intl.csv")

        rows = []
        for row in reader:
            rows.append({"metadata": row})

        with cls._meta.database.transaction():
            cls.insert_many(rows).execute()
Exemple #6
0
    def insert_institutions(cls):

        """
        Write institution rows into the database.
        """

        reader = read_csv(
            'osp.institutions',
            'data/institutions.csv'
        )

        rows = []
        for row in reader:
            rows.append({'metadata': row})

        with cls._meta.database.transaction():
            cls.insert_many(rows).execute()
Exemple #7
0
    def ingest_world(
        cls,
        package='osp.institutions',
        path='data/world.csv',
    ):
        """
        Insert world universities.
        """

        reader = read_csv(package, path)

        for row in map(strip_csv_row, reader):
            if row['country'] != 'US':

                try:
                    cls.create(
                        name=row['name'],
                        url=row['url'],
                        country=row['country'],
                    )

                except Exception as e:
                    print(e)
Exemple #8
0
    def ingest_usa(
        cls,
        package='osp.institutions',
        path='data/usa.csv',
    ):
        """
        Insert US universities.
        """

        reader = read_csv(package, path)

        for row in map(strip_csv_row, reader):
            if row['e_country'] == 'USA':

                try:
                    cls.create(
                        name=row['biz_name'],
                        url=row['web_url'],
                        state=row['e_state'],
                        country='US',
                    )

                except Exception as e:
                    print(e)