def get_udoc_oids(self): """ @brief Get the list of changed/removed UDOC OIDs for comparison """ ret = [] changed_opcs = set() for opc, li in self._udoc.items(): for e in li: changed_opcs.add((opc, e['index'])) gte_gpdb5 = (self._portid == 'greenplum' and is_rev_gte(get_rev_num(self._dbver), get_rev_num('5.0'))) if (self._portid == 'postgres' or gte_gpdb5): method_col = 'opcmethod' else: method_col = 'opcamid' rows = self._run_sql(""" SELECT oc.oid, opcname, amname AS index FROM pg_opclass AS oc, pg_am as am WHERE oc.opcnamespace = {madlib_schema_oid} AND oc.{method_col} = am.oid; """.format(method_col=method_col, madlib_schema_oid=self._schema_oid)) for row in rows: if (row['opcname'], row['index']) in changed_opcs: ret.append(row['oid']) return ret
def get_udoc_oids(self): """ @brief Get the list of changed/removed UDOC OIDs for comparison """ ret = [] changed_opcs = set() for opc, li in self._udoc.items(): for e in li: changed_opcs.add((opc, e['index'])) gte_gpdb5 = (self._portid == 'greenplum' and is_rev_gte( get_rev_num(self._dbver), get_rev_num('5.0'))) if (self._portid == 'postgres' or gte_gpdb5): method_col = 'opcmethod' else: method_col = 'opcamid' rows = self._run_sql(""" SELECT oc.oid, opcname, amname AS index FROM pg_opclass AS oc, pg_am as am WHERE oc.opcnamespace = {madlib_schema_oid} AND oc.{method_col} = am.oid; """.format(method_col=method_col, madlib_schema_oid=self._schema_oid)) for row in rows: if (row['opcname'], row['index']) in changed_opcs: ret.append(row['oid']) return ret
def _load(self): """ @brief Load the configuration file """ rev = get_rev_num(self._mad_dbrev) # _mad_dbrev = 1.9.1 if is_rev_gte([1,9,1],rev): filename = os.path.join(self._maddir, 'madpack', 'changelist_1.9.1_1.12.yaml') # _mad_dbrev = 1.10.0 elif is_rev_gte([1,10],rev): filename = os.path.join(self._maddir, 'madpack', 'changelist_1.10.0_1.12.yaml') # _mad_dbrev = 1.11 else: filename = os.path.join(self._maddir, 'madpack', 'changelist.yaml') config = yaml.load(open(filename)) self._newmodule = config['new module'] if config['new module'] else {} self._udt = config['udt'] if config['udt'] else {} self._udc = config['udc'] if config['udc'] else {} self._udf = self._load_config_param(config['udf']) self._uda = self._load_config_param(config['uda']) # FIXME remove the following special handling for HAWQ after svec is # removed from catalog if self._portid != 'hawq' and not self._is_hawq2: self._udo = self._load_config_param(config['udo']) self._udoc = self._load_config_param(config['udoc'])
def _get_relevant_filenames(self, upgrade_from): """ Get all changelist files that together describe the upgrade process Args: @param upgrade_from: List. Version to upgrade from - the format is expected to be per the output of get_rev_num Details: Changelist files are named in the format changelist_<src>_<dest>.yaml When upgrading from 'upgrade_from_rev' to 'self._curr_rev', all intermediate changelist files need to be followed to get all upgrade steps. This function globs for such files and filters in changelists that lie between the desired versions. Additional verification: The function also ensures that a valid upgrade path exists. Each version in the changelist files needs to be seen twice (except upgrade_from and upgrade_to) for a valid path. This is verified by performing an xor-like operation by adding/deleting from a list. """ output_filenames = [] upgrade_to = self._curr_rev verify_list = [upgrade_from, upgrade_to] # assuming that changelists are in the same directory as this file glob_filter = os.path.abspath( os.path.join(self._maddir, 'madpack', 'changelist*.yaml')) all_changelists = glob.glob(glob_filter) for each_ch in all_changelists: # split file names to get dest versions # Assumption: changelist format is # changelist_<src>_<dest>.yaml ch_basename = os.path.splitext(os.path.basename(each_ch))[0] # remove extension ch_splits = ch_basename.split('_') # underscore delineates sections if len(ch_splits) >= 3: src_version, dest_version = [get_rev_num(i) for i in ch_splits[1:3]] # file is part of upgrade if # upgrade_to >= dest >= src >= upgrade_from is_part_of_upgrade = ( is_rev_gte(src_version, upgrade_from) and is_rev_gte(upgrade_to, dest_version)) if is_part_of_upgrade: for ver in (src_version, dest_version): if ver in verify_list: verify_list.remove(ver) else: verify_list.append(ver) abs_path = os.path.join(self._maddir, 'src', 'madpack', each_ch) output_filenames.append(abs_path) if verify_list: # any version remaining in verify_list implies upgrade path is broken raise RuntimeError("Upgrade from {0} to {1} broken due to missing " "changelist files ({2}). ". format(upgrade_from, upgrade_to, verify_list)) return output_filenames
def test_invalid_path(self): with self.assertRaises(RuntimeError): ChangeHandler(self._dummy_schema, self._dummy_portid, self._dummy_con_args, self.maddir, '1.9', self._dummy_hawq2, upgrade_to=get_rev_num('1.12'))
def _load(self): """ @brief Load the configuration file """ rev = get_rev_num(self._mad_dbrev) upgrade_filenames = self._get_relevant_filenames(rev) for f in upgrade_filenames: with open(f) as handle: config = yaml.load(handle) self._update_objects(config)
def _get_current_version(self): """ Get current version of MADlib This currently assumes that version is available in '$MADLIB_HOME/src/config/Version.yml' """ version_filepath = os.path.abspath( os.path.join(self._maddir, 'config', 'Version.yml')) with open(version_filepath) as ver_file: version_str = str(yaml.load(ver_file)['version']) return get_rev_num(version_str)
def _get_existing_udoc(self): """ @brief Get the existing UDOCs in the current version """ gte_gpdb5 = (self._portid == 'greenplum' and is_rev_gte( get_rev_num(self._dbver), get_rev_num('5.0'))) if (self._portid == 'postgres' or gte_gpdb5): method_col = 'opcmethod' else: method_col = 'opcamid' rows = self._run_sql(""" SELECT opcname, amname AS index FROM pg_opclass AS oc, pg_namespace AS ns, pg_am as am WHERE oc.opcnamespace = ns.oid AND oc.{method_col} = am.oid AND ns.nspname = '{schema}'; """.format(schema=self._schema.lower(), **locals())) self._existing_udoc = defaultdict(list) for row in rows: self._existing_udoc[row['opcname']].append({'index': row['index']})
def _get_existing_udoc(self): """ @brief Get the existing UDOCs in the current version """ gte_gpdb5 = (self._portid == 'greenplum' and is_rev_gte(get_rev_num(self._dbver), get_rev_num('5.0'))) if (self._portid == 'postgres' or gte_gpdb5): method_col = 'opcmethod' else: method_col = 'opcamid' rows = self._run_sql(""" SELECT opcname, amname AS index FROM pg_opclass AS oc, pg_namespace AS ns, pg_am as am WHERE oc.opcnamespace = ns.oid AND oc.{method_col} = am.oid AND ns.nspname = '{schema}'; """.format(schema=self._schema.lower(), **locals())) self._existing_udoc = defaultdict(list) for row in rows: self._existing_udoc[row['opcname']].append({'index': row['index']})
def test_valid_path(self): ch = ChangeHandler(self._dummy_schema, self._dummy_portid, self._dummy_con_args, self.maddir, '1.9.1', self._dummy_hawq2, upgrade_to=get_rev_num('1.12')) self.assertEqual(ch.newmodule.keys(), [ 'knn', 'sssp', 'apsp', 'measures', 'stratified_sample', 'encode_categorical', 'bfs', 'mlp', 'pagerank', 'train_test_split', 'wcc' ]) self.assertEqual(ch.udt, {'kmeans_result': None, 'kmeans_state': None}) self.assertEqual(ch.udf['forest_train'], [{ 'argument': 'text, text, text, text, text, text, text, ' 'integer, integer, boolean, integer, integer, ' 'integer, integer, integer, text, boolean, ' 'double precision', 'rettype': 'void' }, { 'argument': 'text, text, text, text, text, text, text, ' 'integer, integer, boolean, integer, integer, ' 'integer, integer, integer, text, boolean', 'rettype': 'void' }, { 'argument': 'text, text, text, text, text, text, text, ' 'integer, integer, boolean, integer, integer, ' 'integer, integer, integer, text', 'rettype': 'void' }])
def test_valid_path(self): ch = ChangeHandler(self._dummy_schema, self._dummy_portid, self._dummy_con_args, self.maddir, '1.9.1', self._dummy_hawq2, upgrade_to=get_rev_num('1.12')) self.assertEqual(ch.newmodule.keys(), ['knn', 'sssp', 'apsp', 'measures', 'stratified_sample', 'encode_categorical', 'bfs', 'mlp', 'pagerank', 'train_test_split', 'wcc']) self.assertEqual(ch.udt, {'kmeans_result': None, 'kmeans_state': None}) self.assertEqual(ch.udf['forest_train'], [{'argument': 'text, text, text, text, text, text, text, ' 'integer, integer, boolean, integer, integer, ' 'integer, integer, integer, text, boolean, ' 'double precision', 'rettype': 'void'}, {'argument': 'text, text, text, text, text, text, text, ' 'integer, integer, boolean, integer, integer, ' 'integer, integer, integer, text, boolean', 'rettype': 'void'}, {'argument': 'text, text, text, text, text, text, text, ' 'integer, integer, boolean, integer, integer, ' 'integer, integer, integer, text', 'rettype': 'void'}])