Ejemplo n.º 1
0
def lexicographical(row_ids, col_ids, sort_rows, sort_cols):
    dups = duplicates(row_ids) + duplicates(col_ids)
    if dups:
        return {'error_msg': 'Duplicate identifiers: {}'.format(dups)}
    rpart = argsort(row_ids) if sort_rows else list(irange(len(row_ids)))
    cpart = argsort(col_ids) if sort_cols else list(irange(len(col_ids)))
    rowp, colp = get_inverse_perm(rpart, cpart)
    return _pack(rowp, colp)
Ejemplo n.º 2
0
def to_bipartite_from_test_string(mat_str):
    # Unaware of the optional opt in the tuple (dm_decomp does not have opt)
    rows = mat_str[0].split()
    cols_rowwise = [line.split() for line in mat_str[1].splitlines()]
    # check rows for typos
    eqs = set(rows)
    assert len(eqs) == len(rows), (sorted(eqs), sorted(rows))
    assert len(rows) == len(cols_rowwise)
    # check cols for typos
    all_cols = set(chain.from_iterable(cols for cols in cols_rowwise))
    both_row_and_col = sorted(eqs & all_cols)
    assert not both_row_and_col, both_row_and_col
    # check cols for duplicates
    for r, cols in izip(rows, cols_rowwise):
        dups = duplicates(cols)
        assert not dups, 'Duplicate column IDs {} in row {}'.format(dups, r)
    #print(rows)
    #print(cols_rowwise)
    g = nx.Graph()
    g.add_nodes_from(rows)
    g.add_nodes_from(all_cols)
    g.add_edges_from(
        (r, c) for r, cols in izip(rows, cols_rowwise) for c in cols)
    assert is_bipartite_node_set(g, eqs)
    return g, eqs
Ejemplo n.º 3
0
def __add_arities(operators, arity):
    opcodes = operators[::2]    
    dups = duplicates(opcodes)
    assert not dups, dups
    already_added = set(opcodes) & set(ARITY)
    assert not already_added, already_added
    ARITY.update(izip(opcodes, repeat(arity)))
Ejemplo n.º 4
0
	def group_files_sat(self, manifest):
		"""
                Group all the satellite files from downloaded manifest

                :param manifest: satellite manifest from retrieving
		:return result: groupped and cleanned manifest
                """
		if not manifest:
			return manifest
		result = Dict({})
		geore = re.compile(r'%s' % self.geo_prefix)
		pregeore = re.compile(r'%s' % self.pre_geo_prefix)
		firere = re.compile(r'%s' % self.fire_prefix)
		keys = np.array(list(manifest.keys()))
		labels = np.array([''.join(k.split('.')[1:3]) for k in keys])
		indexes = duplicates(labels)
		for k,ind in indexes.items():
			lenind = len(ind)
			if lenind != 2:
				logging.warning('group_files_sat: number of geo and fire granules %d different than 2' % lenind)
				if lenind < 2:
					logging.error('group_files_sat: geo or fire file is missing, number of granules %d different than 2' % lenind)
					continue
			geo = list(filter(geore.search,keys[ind]))
			pregeo = list(filter(pregeore.search,keys[ind]))
			fire = list(filter(firere.search,keys[ind]))
			if not geo:
				if pregeo:
					geok = pregeo[0]
				else:
					logging.error('group_files_sat: no geo data in the manifest')
					continue
			else:
				geok = geo[0]

			if fire:
				firek = fire[0]
			else:
				logging.error('group_files_sat: no fire data in the manifest')
				continue

			logging.info('group_files_sat: %s - geo %s and fire %s' % (k,geok,firek))
			try:
			        r = Dict({
					'time_start_iso' : manifest[geok]['time_start'],
					'time_end_iso' : manifest[geok]['time_end'],
					'geo_url' : manifest[geok]['url'],
					'geo_local_path' : manifest[geok]['local_path'],
					'geo_description' : manifest[geok]['dataset_id'],
					'fire_url' : manifest[firek]['url'],
					'fire_local_path' : manifest[firek]['local_path'],
					'fire_description' : manifest[firek]['dataset_id']
				})
			        result.update({k: r})
			except Exception as e:
				logging.error('group_files_sat: when creating manifest with error %s' % str(e))
				continue
		return result
Ejemplo n.º 5
0
    def load(self, fpath):
        from exprparser import parse

        with open(os.path.join(config.input_directory, fpath), "rb") as f:
            reader = csv.reader(f)
            lines = skip_comment_cells(strip_rows(reader))
            header = lines.next()
            self.expressions = [parse(s, autovariables=True) for s in header]
            table = []
            for line in lines:
                if any(value == "" for value in line):
                    raise Exception("empty cell found in %s" % fpath)
                table.append([eval(value) for value in line])
        ndim = len(header)
        unique_last_d, dupe_last_d = unique_duplicate(table.pop(0))
        if dupe_last_d:
            print(
                "Duplicate column header value(s) (for '%s') in '%s': %s"
                % (header[-1], fpath, ", ".join(str(v) for v in dupe_last_d))
            )
            raise Exception(
                "bad alignment data in '%s': found %d " "duplicate column header value(s)" % (fpath, len(dupe_last_d))
            )

        # strip the ndim-1 first columns
        headers = [[line.pop(0) for line in table] for _ in range(ndim - 1)]

        possible_values = [list(unique(values)) for values in headers]
        if ndim > 1:
            # having duplicate values is normal when there are more than 2
            # dimensions but we need to test whether there are duplicates of
            # combinations.
            dupe_combos = list(duplicates(zip(*headers)))
            if dupe_combos:
                print("Duplicate row header value(s) in '%s':" % fpath)
                print(PrettyTable(dupe_combos))
                raise Exception(
                    "bad alignment data in '%s': found %d " "duplicate row header value(s)" % (fpath, len(dupe_combos))
                )

        possible_values.append(unique_last_d)
        self.possible_values = possible_values
        self.probabilities = list(chain.from_iterable(table))
        num_possible_values = prod(len(values) for values in possible_values)
        if len(self.probabilities) != num_possible_values:
            raise Exception(
                "incoherent alignment data in '%s': %d data cells "
                "found while it should be %d based on the number "
                "of possible values in headers (%s)"
                % (
                    fpath,
                    len(self.probabilities),
                    num_possible_values,
                    " * ".join(str(len(values)) for values in possible_values),
                )
            )
Ejemplo n.º 6
0
def load_ndarray(fpath, celltype=None):
    print(" - reading", fpath)
    with open(fpath, "rb") as f:
        reader = csv.reader(f)
        line_stream = skip_comment_cells(strip_rows(reader))
        header = line_stream.next()
        str_table = []
        for line in line_stream:
            if any(value == '' for value in line):
                raise Exception("empty cell found in %s" % fpath)
            str_table.append(line)
    ndim = len(header)

    # handle last dimension header (horizontal values)
    last_d_header = str_table.pop(0)
    # auto-detect type of values for the last d and convert them
    last_d_pvalues = convert_1darray(last_d_header)

    unique_last_d, dupe_last_d = unique_duplicate(last_d_pvalues)
    if dupe_last_d:
        print(("Duplicate column header value(s) (for '%s') in '%s': %s"
              % (header[-1], fpath,
                 ", ".join(str(v) for v in dupe_last_d))))
        raise Exception("bad data in '%s': found %d "
                        "duplicate column header value(s)"
                        % (fpath, len(dupe_last_d)))

    # handle other dimensions header

    # strip the ndim-1 first columns
    headers = [[line.pop(0) for line in str_table]
               for _ in range(ndim - 1)]
    headers = [convert_1darray(pvalues_str) for pvalues_str in headers]
    if ndim > 1:
        # having duplicate values is normal when there are more than 2
        # dimensions but we need to test whether there are duplicates of
        # combinations.
        dupe_combos = list(duplicates(zip(*headers)))
        if dupe_combos:
            print(("Duplicate row header value(s) in '%s':" % fpath))
            print((PrettyTable(dupe_combos)))
            raise Exception("bad alignment data in '%s': found %d "
                            "duplicate row header value(s)"
                            % (fpath, len(dupe_combos)))

    possible_values = [np.array(list(unique(pvalues))) for pvalues in headers]
    possible_values.append(np.array(unique_last_d))

    shape = tuple(len(values) for values in possible_values)
    num_possible_values = prod(shape)

    # transform the 2d table into a 1d list
    str_table = list(chain.from_iterable(str_table))
    if len(str_table) != num_possible_values:
        raise Exception("incoherent data in '%s': %d data cells "
                        "found while it should be %d based on the number "
                        "of possible values in headers (%s)"
                        % (fpath,
                           len(str_table),
                           num_possible_values,
                           ' * '.join(str(len(values))
                                      for values in possible_values)))

    #TODO: compare time with numpy built-in conversion:
    # if dtype is None, numpy tries to detect the best type itself
    # which it does a good job of if the values are already numeric values
    # if dtype is provided, numpy does a good job to convert from string
    # values.
    if celltype is None:
        celltype = detect_column_type(str_table)
    data = convert_1darray(str_table, celltype)
    array = np.array(data, dtype=celltype)
    return LabeledArray(array.reshape(shape), header, possible_values)
Ejemplo n.º 7
0
def load_ndarray(fpath, celltype=None):
    print(" - reading", fpath)
    with open(fpath, "rb") as f:
        reader = csv.reader(f)
        line_stream = skip_comment_cells(strip_rows(reader))
        header = line_stream.next()
        str_table = []
        for line in line_stream:
            if any(value == '' for value in line):
                raise Exception("empty cell found in %s" % fpath)
            str_table.append(line)
    ndim = len(header)

    # handle last dimension header (horizontal values)
    last_d_header = str_table.pop(0)
    # auto-detect type of values for the last d and convert them
    last_d_pvalues = convert_1darray(last_d_header)

    unique_last_d, dupe_last_d = unique_duplicate(last_d_pvalues)
    if dupe_last_d:
        print(("Duplicate column header value(s) (for '%s') in '%s': %s" %
               (header[-1], fpath, ", ".join(str(v) for v in dupe_last_d))))
        raise Exception("bad data in '%s': found %d "
                        "duplicate column header value(s)" %
                        (fpath, len(dupe_last_d)))

    # handle other dimensions header

    # strip the ndim-1 first columns
    headers = [[line.pop(0) for line in str_table] for _ in range(ndim - 1)]
    headers = [convert_1darray(pvalues_str) for pvalues_str in headers]
    if ndim > 1:
        # having duplicate values is normal when there are more than 2
        # dimensions but we need to test whether there are duplicates of
        # combinations.
        dupe_combos = list(duplicates(zip(*headers)))
        if dupe_combos:
            print(("Duplicate row header value(s) in '%s':" % fpath))
            print((PrettyTable(dupe_combos)))
            raise Exception("bad alignment data in '%s': found %d "
                            "duplicate row header value(s)" %
                            (fpath, len(dupe_combos)))

    possible_values = [np.array(list(unique(pvalues))) for pvalues in headers]
    possible_values.append(np.array(unique_last_d))

    shape = tuple(len(values) for values in possible_values)
    num_possible_values = prod(shape)

    # transform the 2d table into a 1d list
    str_table = list(chain.from_iterable(str_table))
    if len(str_table) != num_possible_values:
        raise Exception(
            "incoherent data in '%s': %d data cells "
            "found while it should be %d based on the number "
            "of possible values in headers (%s)" %
            (fpath, len(str_table), num_possible_values, ' * '.join(
                str(len(values)) for values in possible_values)))

    # TODO: compare time with numpy built-in conversion:
    # if dtype is None, numpy tries to detect the best type itself
    # which it does a good job of if the values are already numeric values
    # if dtype is provided, numpy does a good job to convert from string
    # values.
    if celltype is None:
        celltype = detect_column_type(str_table)
    data = convert_1darray(str_table, celltype)
    array = np.array(data, dtype=celltype)
    return LabeledArray(array.reshape(shape), header, possible_values)