def __init__(self, uri, dbname='fwjr', collname='db', chunk_size=1000): "ctor with mongo uri: elasticsearch://host:port" Storage.__init__(self, uri) endpoint = uri.replace('elasticsearch', 'http') self.client = Elasticsearch([endpoint]) self.dbname = dbname self.collname = collname
def __init__(self, uri, dbname='fwjr', collname='db', chunk_size=1000): "ctor with mongo uri: mongodb://host:port" Storage.__init__(self, uri) self.client = MongoClient(uri, w=1) self.mdb = self.client[dbname] self.mdb.add_son_manipulator(WMASONManipulator()) self.collname = collname self.coll = self.mdb[collname] self.jobs = self.mdb['jobs'] # separate collection for job results self.log(self.coll) self.chunk_size = chunk_size # read error codes fname = os.environ.get('WMARCHIVE_ERROR_CODES', '') self.codes = {} # dict of numeric codes if fname: with open(fname, 'r') as exit_codes_file: self.codes = json.load(exit_codes_file) # read performance metrics fname = os.environ.get('WMARCHIVE_PERF_METRICS', '') if fname: with open(fname, 'r') as metrics_file: self.metrics = json.load(metrics_file) # printout pymongo version print("### pymongo.version %s" % pymongo.version)
def __init__(self, uri, dbname='fwjr', collname='db', chunk_size=1000): "ctor with mongo uri: mongodb://host:port" Storage.__init__(self, uri) self.client = MongoClient(uri, w=1) self.mdb = self.client[dbname] self.mdb.add_son_manipulator(WMASONManipulator()) self.collname = collname self.coll = self.mdb[collname] self.jobs = self.mdb['jobs'] # separate collection for job results self.log(self.coll) self.chunk_size = chunk_size
def __init__(self, uri): "ctor with avro uri: avroio:/path/schema.avsc" Storage.__init__(self, uri) schema = self.uri if not os.path.exists(schema): raise Exception("No avro schema file found in provided uri: %s" % uri) self.hdir = self.uri.rsplit("/", 1)[0] if not os.path.exists(self.hdir): os.makedirs(self.hdir) schema_doc = open(schema).read() self.schema = avro.schema.parse(schema_doc) self.schema_json = json.loads(schema_doc)
def __init__(self, uri, compress=True): "ctor with hdfs uri: hdfsio:/path/schema.avsc" Storage.__init__(self, uri) schema = self.uri if not hdfs.ls(schema): raise Exception("No avro schema file found in provided uri: %s" % uri) self.hdir = self.uri.rsplit('/', 1)[0] if not hdfs.path.isdir(self.hdir): raise Exception('HDFS path %s does not exists' % self.hdir) schema_doc = hdfs.load(schema) self.schema = avro.schema.parse(schema_doc) self.compress = compress
def __init__(self, uri): "ctor with avro uri: avroio:/path/schema.avsc" Storage.__init__(self, uri) schema = self.uri if not os.path.exists(schema): raise Exception("No avro schema file found in provided uri: %s" % uri) self.hdir = self.uri.rsplit('/', 1)[0] if not os.path.exists(self.hdir): os.makedirs(self.hdir) schema_doc = open(schema).read() self.schema = avro.schema.parse(schema_doc) self.schema_json = json.loads(schema_doc)
def __init__(self, uri, dbname='fwjr', collname='db', chunk_size=1000): "ctor with mongo uri: mongodb://host:port" Storage.__init__(self, uri) self.client = MongoClient(uri, w=1) self.mdb = self.client[dbname] self.mdb.add_son_manipulator(WMASONManipulator()) self.collname = collname self.coll = self.mdb[collname] self.jobs = self.mdb['jobs'] # separate collection for job results self.acol = self.mdb['acol'] # separate collection for aggregated results self.log(self.coll) self.chunk_size = chunk_size try: self.coll.ensure_index([('wmaid', DESCENDING)], unique=True) self.coll.ensure_index([('wmats', DESCENDING), ('stype', DESCENDING)]) except: pass
def __init__(self, uri, dbname='fwjr', collname='db', chunk_size=1000): "ctor with mongo uri: mongodb://host:port" Storage.__init__(self, uri) self.client = MongoClient(uri, w=1) self.mdb = self.client[dbname] self.mdb.add_son_manipulator(WMASONManipulator()) self.collname = collname self.coll = self.mdb[collname] self.jobs = self.mdb['jobs'] # separate collection for job results self.log(self.coll) self.chunk_size = chunk_size # read error codes fname = os.environ.get('WMARCHIVE_ERROR_CODES', '') self.codes = {} # dict of numeric codes if fname: with open(fname, 'r') as exit_codes_file: self.codes = json.load(exit_codes_file) # read performance metrics fname = os.environ.get('WMARCHIVE_PERF_METRICS', '') if fname: with open(fname, 'r') as metrics_file: self.metrics = json.load(metrics_file)
def __init__(self, uri): "ctor with fileio uri: fileio:/path" Storage.__init__(self, uri) if not os.path.exists(self.uri): os.makedirs(self.uri)