def dataset(self, args, meta): assert len(args) == 2 dataset_name, meta_exp = args namespace, name = dataset_name.M keep_meta = True return Node("SQL", meta=DBDataset._list_files_sql( namespace, name, False, keep_meta, meta_exp, "self", None))
def dataset(self, args): assert len(args) in (1,2) namespace, name = args[0] dataset = DBDataset.get(self.DB, namespace, name) meta_expression = None if len(args) < 2 else args[1] condition = None if not meta_expression else self.meta_exp_to_sql(meta_expression) files = dataset.list_files(condition=condition, with_metadata=True) return files
def dataset(self, args, meta, provenance=None): assert len(args) == 2 dataset_name, meta_exp = args namespace, name = dataset_name.M dataset = DBDataset.get(self.DB, namespace, name) keep_meta = meta["keep_meta"] files = dataset.list_files(condition=meta_exp, relationship="self" if provenance is None else provenance, with_metadata=keep_meta) #print ("Evaluator.dataset: files:", files) assert isinstance(files, DBFileSet) return files
def dataset(self, args, value, provenance=None): assert len(args) == 2 dataset_name, meta_exp = args namespace, name = dataset_name.V dataset = DBDataset.get(self.DB, namespace, name) condition = None if meta_exp is None else self.meta_exp_to_sql( meta_exp) #print("dataset: condition:", condition) files = dataset.list_files( condition=condition, relationship="self" if provenance is None else provenance, with_metadata=True) #print ("Evaluator.dataset: files:", files) assert isinstance(files, DBFileSet) return files
import psycopg2, sys, getopt from dbobjects import DBDataset, DBFile connstr = sys.argv[1] conn = psycopg2.connect(connstr) namespace = "test" dataset = DBDataset(conn, namespace, "K").save() for i in range(1000): fn = "%03d" % (i, ) f = DBFile(conn, namespace, f"{fn}.dat") meta = {"i": i, "s": fn, "f": float(i * i), "b": i % 2 == 0} f.save() f.save_metadata(meta) dataset.add_file(f)
import sys, psycopg2, yaml from dbobjects import DBDataset cfg = yaml.load(open("cfg.yaml", "r")) in_conn_str = cfg["in"] out_conn_str = cfg["out"] in_conn = psycopg2.connect(in_conn_str) out_conn = psycopg2.connect(out_conn_str) namespace = "dune" cin = in_conn.cursor() cin.execute("select distinct file_type_desc from file_types") for (dsname,) in cin.fetchall(): DBDataset(out_conn, namespace, dsname).save() print("dataset %s:%s created" % (namespace, dsname))