def __init__(self, uri, dbname='fwjr', collname='db', chunk_size=1000):
     "ctor with mongo uri: elasticsearch://host:port"
     Storage.__init__(self, uri)
     endpoint = uri.replace('elasticsearch', 'http')
     self.client = Elasticsearch([endpoint])
     self.dbname = dbname
     self.collname = collname
Exemple #2
0
 def __init__(self, uri, dbname='fwjr', collname='db', chunk_size=1000):
     "ctor with mongo uri: elasticsearch://host:port"
     Storage.__init__(self, uri)
     endpoint = uri.replace('elasticsearch', 'http')
     self.client = Elasticsearch([endpoint])
     self.dbname = dbname
     self.collname = collname
Exemple #3
0
    def __init__(self, uri, dbname='fwjr', collname='db', chunk_size=1000):
        "ctor with mongo uri: mongodb://host:port"
        Storage.__init__(self, uri)
        self.client = MongoClient(uri, w=1)
        self.mdb = self.client[dbname]
        self.mdb.add_son_manipulator(WMASONManipulator())
        self.collname = collname
        self.coll = self.mdb[collname]
        self.jobs = self.mdb['jobs'] # separate collection for job results
        self.log(self.coll)
        self.chunk_size = chunk_size

        # read error codes
        fname = os.environ.get('WMARCHIVE_ERROR_CODES', '')
        self.codes = {} # dict of numeric codes
        if  fname:
            with open(fname, 'r') as exit_codes_file:
                self.codes = json.load(exit_codes_file)

        # read performance metrics
        fname = os.environ.get('WMARCHIVE_PERF_METRICS', '')
        if  fname:
            with open(fname, 'r') as metrics_file:
                self.metrics = json.load(metrics_file)

        # printout pymongo version
        print("### pymongo.version %s" % pymongo.version)
Exemple #4
0
 def __init__(self, uri, dbname='fwjr', collname='db', chunk_size=1000):
     "ctor with mongo uri: mongodb://host:port"
     Storage.__init__(self, uri)
     self.client = MongoClient(uri, w=1)
     self.mdb = self.client[dbname]
     self.mdb.add_son_manipulator(WMASONManipulator())
     self.collname = collname
     self.coll = self.mdb[collname]
     self.jobs = self.mdb['jobs'] # separate collection for job results
     self.log(self.coll)
     self.chunk_size = chunk_size
Exemple #5
0
 def __init__(self, uri):
     "ctor with avro uri: avroio:/path/schema.avsc"
     Storage.__init__(self, uri)
     schema = self.uri
     if not os.path.exists(schema):
         raise Exception("No avro schema file found in provided uri: %s" % uri)
     self.hdir = self.uri.rsplit("/", 1)[0]
     if not os.path.exists(self.hdir):
         os.makedirs(self.hdir)
     schema_doc = open(schema).read()
     self.schema = avro.schema.parse(schema_doc)
     self.schema_json = json.loads(schema_doc)
Exemple #6
0
 def __init__(self, uri, compress=True):
     "ctor with hdfs uri: hdfsio:/path/schema.avsc"
     Storage.__init__(self, uri)
     schema = self.uri
     if  not hdfs.ls(schema):
         raise Exception("No avro schema file found in provided uri: %s" % uri)
     self.hdir = self.uri.rsplit('/', 1)[0]
     if  not hdfs.path.isdir(self.hdir):
         raise Exception('HDFS path %s does not exists' % self.hdir)
     schema_doc = hdfs.load(schema)
     self.schema = avro.schema.parse(schema_doc)
     self.compress = compress
Exemple #7
0
 def __init__(self, uri, compress=True):
     "ctor with hdfs uri: hdfsio:/path/schema.avsc"
     Storage.__init__(self, uri)
     schema = self.uri
     if not hdfs.ls(schema):
         raise Exception("No avro schema file found in provided uri: %s" %
                         uri)
     self.hdir = self.uri.rsplit('/', 1)[0]
     if not hdfs.path.isdir(self.hdir):
         raise Exception('HDFS path %s does not exists' % self.hdir)
     schema_doc = hdfs.load(schema)
     self.schema = avro.schema.parse(schema_doc)
     self.compress = compress
Exemple #8
0
 def __init__(self, uri):
     "ctor with avro uri: avroio:/path/schema.avsc"
     Storage.__init__(self, uri)
     schema = self.uri
     if not os.path.exists(schema):
         raise Exception("No avro schema file found in provided uri: %s" %
                         uri)
     self.hdir = self.uri.rsplit('/', 1)[0]
     if not os.path.exists(self.hdir):
         os.makedirs(self.hdir)
     schema_doc = open(schema).read()
     self.schema = avro.schema.parse(schema_doc)
     self.schema_json = json.loads(schema_doc)
Exemple #9
0
 def __init__(self, uri, dbname='fwjr', collname='db', chunk_size=1000):
     "ctor with mongo uri: mongodb://host:port"
     Storage.__init__(self, uri)
     self.client = MongoClient(uri, w=1)
     self.mdb = self.client[dbname]
     self.mdb.add_son_manipulator(WMASONManipulator())
     self.collname = collname
     self.coll = self.mdb[collname]
     self.jobs = self.mdb['jobs'] # separate collection for job results
     self.acol = self.mdb['acol'] # separate collection for aggregated results
     self.log(self.coll)
     self.chunk_size = chunk_size
     try:
         self.coll.ensure_index([('wmaid', DESCENDING)], unique=True)
         self.coll.ensure_index([('wmats', DESCENDING), ('stype', DESCENDING)])
     except:
         pass
Exemple #10
0
    def __init__(self, uri, dbname='fwjr', collname='db', chunk_size=1000):
        "ctor with mongo uri: mongodb://host:port"
        Storage.__init__(self, uri)
        self.client = MongoClient(uri, w=1)
        self.mdb = self.client[dbname]
        self.mdb.add_son_manipulator(WMASONManipulator())
        self.collname = collname
        self.coll = self.mdb[collname]
        self.jobs = self.mdb['jobs'] # separate collection for job results
        self.log(self.coll)
        self.chunk_size = chunk_size

        # read error codes
        fname = os.environ.get('WMARCHIVE_ERROR_CODES', '')
        self.codes = {} # dict of numeric codes
        if  fname:
            with open(fname, 'r') as exit_codes_file:
                self.codes = json.load(exit_codes_file)

        # read performance metrics
        fname = os.environ.get('WMARCHIVE_PERF_METRICS', '')
        if  fname:
            with open(fname, 'r') as metrics_file:
                self.metrics = json.load(metrics_file)
Exemple #11
0
 def __init__(self, uri):
     "ctor with fileio uri: fileio:/path"
     Storage.__init__(self, uri)
     if not os.path.exists(self.uri):
         os.makedirs(self.uri)
Exemple #12
0
 def __init__(self, uri):
     "ctor with fileio uri: fileio:/path"
     Storage.__init__(self, uri)
     if  not os.path.exists(self.uri):
         os.makedirs(self.uri)