def read_first_matrix(specs, delete_failures=False): """ Attempt to read each path provided by specs, until one exists. If none of the specs point to an existing path, raise a FileNotFoundError. specs should be a list where each element is a dictionary specifying a potential path from which to read a matrix. Currently, the spec dictionary supports the following keys: - path: path to the file - transpose: whether to transpose the file after reading it. If omitted, then False. - file_format: format of the matrix. If omitted, then infer. """ paths = list() for spec in specs: path = pathlib.Path(spec['path']) paths.append(str(path)) if not path.is_file(): continue transpose = spec.get('transpose', False) file_format = spec.get('file_format', 'infer') try: matrix = read_matrix(path, file_format=file_format) except Exception as error: logging.warning(f'Error reading matrix at {path}:\n{error}') if delete_failures: path.unlink() logging.warning(f'Deleting file at {path}') continue if transpose: matrix = matrix.transpose() return matrix raise FileNotFoundError( f'No matrix files found at the specified paths:\n' + '\n'.join(paths))
def normalize(matrix, vector, axis, damping_exponent): """ Normalize a 2D numpy.ndarray. Parameters ========== matrix : numpy.ndarray or scipy.sparse vector : numpy.ndarray Vector used for row or column normalization of matrix. axis : str 'rows' or 'columns' for which axis to normalize damping_exponent : float exponent to use in scaling a node's row or column """ assert matrix.ndim == 2 assert vector.ndim == 1 if damping_exponent == 0: return matrix with numpy.errstate(divide='ignore'): vector **= -damping_exponent vector[numpy.isinf(vector)] = 0 vector = scipy.sparse.diags(vector) if axis == 'rows': # equivalent to `vector @ matrix` but returns scipy.sparse.csc not scipy.sparse.csr # noqa: E501 matrix = (matrix.transpose() @ vector).transpose() else: matrix = matrix @ vector return matrix