def handle(self, *args, **options):
        """Perform import."""

        engine = create_engine(URL("mysql",
            username=options["dbuser"],
            password=options["dbpass"],
            host=options["dbhost"],
            database=options["database"],
            port=options["dbport"],
            query=dict(
                charset="utf8", 
                use_unicode=0
            )
        ))
        init_models(engine)
        self.session = models.Session()

        # random selection for now
        repos = []
        #repos.extend(self.session.query(models.Repository)\
        #        .filter(models.Repository.identifier=='ehri1691None').all())
        repos.extend(self.session.query(models.Repository).all())
        self.stdout.write("Adding %s repos\n" % len(repos))
        for repo in repos:
            if not repo.identifier:
                self.stderr.write("\n\nCannot index repository with no identifier\n")
                continue
            self.stderr.write("\n\nIndexing repo: %s\n" % repo.identifier)
            self.import_icaatom_repo(repo)
Example #2
0
    def __init__(self, database=None, username=None,
                password=None, hostname="localhost", port=None, atomuser=None,
                rowfunc=None, donefunc=None):
        engine = create_engine(URL("mysql",
            username=username,
            password=password,
            host=hostname,
            database=database,
            port=port,
            query=dict(
                charset="utf8",
                use_unicode=0
            )
        ))
        init_models(engine)
        self.session = models.Session()
        self.donefunc = donefunc
        self.rowfunc = rowfunc
        self.timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")

        self.user = self.session.query(models.User).filter(
                models.User.username == atomuser).one()
        # load default status and detail... this is where
        # SQLAlchemy gets horrible
        self.status = self.session.query(models.Term)\
                .filter(models.Term.taxonomy_id == keys.TaxonomyKeys\
                    .DESCRIPTION_STATUS_ID)\
                .join(models.TermI18N, models.Term.id == models.TermI18N.id)\
                .filter(models.TermI18N.name == "Draft").one()
        self.detail = self.session.query(models.Term)\
                .filter(models.Term.taxonomy_id == keys.TaxonomyKeys\
                    .DESCRIPTION_DETAIL_LEVEL_ID)\
                .join(models.TermI18N, models.Term.id == models.TermI18N.id)\
                .filter(models.TermI18N.name == "Partial").one()
        self.actorroot = self.session.query(models.Actor).filter(
                models.Actor.id==keys.ActorKeys.ROOT_ID).one()
        self.termroot = self.session.query(models.Term).filter(
                models.Term.id==keys.TermKeys.ROOT_ID).one()
        # running count of slugs used so far in the import transaction
        self.slugs = {}
        self.ids = {}
    def __init__(self):

        self.options, self.args = get_options()

        engine = create_engine(URL("mysql",
            username=self.options.dbuser,
            password=self.options.dbpass,
            host=self.options.dbhost,
            database=self.options.database,
            port=self.options.dbport,
            query=dict(
                charset="utf8", 
                use_unicode=0
            )
        ))
        init_models(engine)
        self.session = models.Session()

        self.solrurl = "http://%s:%d/%s/update/json" % (
                self.options.solrhost,
                self.options.solrport,
                self.options.solrcontext,
        )
Example #4
0
    def __init__(self):
        """Initialise importer."""
        parser = OptionParser(usage="usage: %prog [options] <csvfile>",
                              version="%prog 1.0")
        parser.add_option(
                "-f",
                "--from",
                action="store",
                dest="fromrec",
                type="int",
                default=1,
                help="Import records from this offset")
        parser.add_option(
                "-t",
                "--to",
                action="store",
                dest="to",
                type="int",
                default=-1,
                help="Import records up to this offset")
        parser.add_option(
                "-U",
                "--dbuser",
                action="store",
                dest="dbuser",
                default="qubit",
                help="Database user")
        parser.add_option(
                "-p",
                "--dbpass",
                action="store",
                dest="dbpass",
                help="Database password")
        parser.add_option(
                "-H",
                "--dbhost",
                action="store",
                dest="dbhost",
                default="localhost",
                help="Database host name")
        parser.add_option(
                "-P",
                "--dbport",
                action="store",
                dest="dbport",
                help="Database host name")
        parser.add_option(
                "-D",
                "--database",
                action="store",
                dest="database",
                default="qubit",
                help="Database name")
        parser.add_option(
                "-u",
                "--user",
                action="store",
                dest="user",
                default="qubit",
                help="User to own imported records")
        parser.add_option(
                "-l",
                "--lang",
                action="store",
                dest="lang",
                default="en",
                help="Language for imported i18n fields")

        self.options, self.args = parser.parse_args()
        if len(self.args) != 1:
            parser.error("No CSV file provided")

        engine = create_engine(URL("mysql",
            username=self.options.dbuser,
            password=self.options.dbpass,
            host=self.options.dbhost,
            database=self.options.database,
            port=self.options.dbport,
            query=dict(
                charset="utf8", 
                use_unicode=0
            )
        ))
        init_models(engine)
        self.session = models.Session()

        self.user = self.session.query(models.User).filter(
                models.User.username == self.options.user).one()
        self.parent = self.session.query(models.Actor)\
                .filter(models.Actor.id==keys.ActorKeys.ROOT_ID).one()
        # load default status and detail... this is where
        # SQLAlchemy gets horrible
        self.status = self.session.query(models.Term)\
                .filter(models.Term.taxonomy_id == keys.TaxonomyKeys\
                    .DESCRIPTION_STATUS_ID)\
                .join(models.TermI18N, models.Term.id == models.TermI18N.id)\
                .filter(models.TermI18N.name == "Draft").one()
        self.detail = self.session.query(models.Term)\
                .filter(models.Term.taxonomy_id == keys.TaxonomyKeys\
                    .DESCRIPTION_DETAIL_LEVEL_ID)\
                .join(models.TermI18N, models.Term.id == models.TermI18N.id)\
                .filter(models.TermI18N.name == "Partial").one()

        # running count of slugs used so far in the import transaction
        self.slugs = {}
Example #5
0
    PATH = "eag"
    #print(repo.identifier)
    #for prop in repo.properties:
    #    for k, v in prop.get_i18n("en").iteritems():
    #        print("%-20s : %s" % (k, phpserialize.loads(v)))
    #for address in repo.contacts:
    #    for k, v in address.get_i18n("en").iteritems():
    #        print "     %-20s : %s" % (k, v)
    #for k, v in repo.get_i18n("en").iteritems():
    #    print "%-20s : %s" % (k, v)
    try:
        country_code, doc = get_doc_base(repo)
        dirpath = os.path.join(PATH, country_code.lower())
        out = etree.tostring(doc, pretty_print=True)
        if not os.path.exists(dirpath):
            os.mkdir(dirpath)

        with open(os.path.join(dirpath, str(repo.id) + ".xml"), "w") as f:
            f.write(out)
    except NoCountryCode, e:
        print(e.message, file=sys.stderr)


engine = create_engine(
    "mysql://*****:*****@localhost/ehri_icaatom?charset=utf8")
init_models(engine)
session = models.Session()

for repo in session.query(models.Repository).all():
    dump_repo(repo)
    def __init__(self):
        """Initialise importer."""
        parser = OptionParser(usage="usage: %prog [options] <repository_name> <jsonfile>",
                              version="%prog 1.0")
        parser.add_option(
                "-f",
                "--from",
                action="store",
                dest="fromrec",
                type="int",
                default=1,
                help="Import records from this offset")
        parser.add_option(
                "-t",
                "--to",
                action="store",
                dest="to",
                type="int",
                default=-1,
                help="Import records up to this offset")
        parser.add_option(
                "-U",
                "--dbuser",
                action="store",
                dest="dbuser",
                default="qubit",
                help="Database user")
        parser.add_option(
                "-p",
                "--dbpass",
                action="store",
                dest="dbpass",
                help="Database password")
        parser.add_option(
                "-H",
                "--dbhost",
                action="store",
                dest="dbhost",
                default="localhost",
                help="Database host name")
        parser.add_option(
                "-P",
                "--dbport",
                action="store",
                dest="dbport",
                help="Database host name")
        parser.add_option(
                "-D",
                "--database",
                action="store",
                dest="database",
                default="qubit",
                help="Database name")
        parser.add_option(
                "-u",
                "--user",
                action="store",
                dest="user",
                default="qubit",
                help="User to own imported records")
        parser.add_option(
                "-l",
                "--lang",
                action="store",
                dest="lang",
                default="en",
                help="Language for imported i18n fields")

        self.options, self.args = parser.parse_args()
        if len(self.args) != 2:
            parser.error()
        self.reponame = self.args[0]
        self.jsonfile = self.args[1]

        engine = create_engine(URL("mysql",
            username=self.options.dbuser,
            password=self.options.dbpass,
            host=self.options.dbhost,
            database=self.options.database,
            port=self.options.dbport,
            query=dict(
                charset="utf8", 
                use_unicode=0
            )
        ))
        init_models(engine)
        self.session = models.Session()
        try:
            self.repo = self.session.query(models.Repository)\
                    .join(models.ActorI18N, models.ActorI18N.id == models.Repository.id)\
                    .filter(models.ActorI18N.authorized_form_of_name==self.reponame)\
                    .one()
        except NoResultFound:
            print >> sys.stderr, "No repository found for name: %s" % self.reponame
            sys.exit(1)
        try:
            self.user = self.session.query(models.User).filter(
                    models.User.username == self.options.user).one()
        except NoResultFound:
            print >> sys.stderr, "No user found for name: %s" % self.options.user
            sys.exit(1)
        self.parent = self.session.query(models.InformationObject)\
                .filter(models.InformationObject.id==keys.InformationObjectKeys.ROOT_ID)\
                .one()
        # load default status and detail... this is where
        # SQLAlchemy gets horrible
        self.status = self.session.query(models.Term)\
                .filter(models.Term.taxonomy_id == keys.TaxonomyKeys\
                    .DESCRIPTION_STATUS_ID)\
                .join(models.TermI18N, models.Term.id == models.TermI18N.id)\
                .filter(models.TermI18N.name == "Draft").one()
        self.detail = self.session.query(models.Term)\
                .filter(models.Term.taxonomy_id == keys.TaxonomyKeys\
                    .DESCRIPTION_DETAIL_LEVEL_ID)\
                .join(models.TermI18N, models.Term.id == models.TermI18N.id)\
                .filter(models.TermI18N.name == "Partial").one()
        self.pubtype = self.session.query(models.Term)\
                .filter(models.Term.taxonomy_id == keys.TaxonomyKeys\
                    .STATUS_TYPE_ID)\
                .join(models.TermI18N, models.Term.id == models.TermI18N.id)\
                .filter(models.TermI18N.name == "publication").one()
        self.pubstatus = self.session.query(models.Term)\
                .filter(models.Term.taxonomy_id == keys.TaxonomyKeys\
                    .PUBLICATION_STATUS_ID)\
                .join(models.TermI18N, models.Term.id == models.TermI18N.id)\
                .filter(models.TermI18N.name == "published").one()
        self.lod_fonds = self.session.query(models.Term)\
                .filter(models.Term.taxonomy_id == keys.TaxonomyKeys\
                    .LEVEL_OF_DESCRIPTION_ID)\
                .join(models.TermI18N, models.Term.id == models.TermI18N.id)\
                .filter(models.TermI18N.name == "Fonds").one() 
        self.lod_coll = self.session.query(models.Term)\
                .filter(models.Term.taxonomy_id == keys.TaxonomyKeys\
                    .LEVEL_OF_DESCRIPTION_ID)\
                .join(models.TermI18N, models.Term.id == models.TermI18N.id)\
                .filter(models.TermI18N.name == "Collection").one()
        self.slugs = {}
Example #7
0
    #    for k, v in prop.get_i18n("en").iteritems():
    #        print("%-20s : %s" % (k, phpserialize.loads(v)))
    #for address in repo.contacts:
    #    for k, v in address.get_i18n("en").iteritems():
    #        print "     %-20s : %s" % (k, v)
    #for k, v in repo.get_i18n("en").iteritems():
    #    print "%-20s : %s" % (k, v)
    try:
        country_code, doc = get_doc_base(repo)
        dirpath = os.path.join(PATH, country_code.lower())
        out = etree.tostring(doc, pretty_print=True)
        if not os.path.exists(dirpath):
            os.mkdir(dirpath)
        
        with open(os.path.join(dirpath, str(repo.id) + ".xml"), "w") as f:            
            f.write(out)
    except NoCountryCode, e:
        print(e.message, file = sys.stderr)
        


engine = create_engine("mysql://*****:*****@localhost/ehri_icaatom?charset=utf8")
init_models(engine)
session = models.Session()

for repo in session.query(models.Repository).all():
    dump_repo(repo)