Example #1
0
    def test_mark_route_datapoint(self, mock_gmaps):
        map_payload = [
            {
                'summary': 'US-101 S',
                'legs': [
                    {
                        'distance': { 'text': 'distance' },
                        'duration_in_traffic': { 'value': 1111 }
                    }
                ],
                'duration': { 'value': 9999 }   # default duration
            }
        ]
        # todo: assert parameters
        mock_gmaps.return_value.directions.return_value = map_payload

        endpoint = '/api/v1/timings/1'
        response = self.app.get(endpoint)
        self.assertEqual(response.status_code, 204)

        key = ndb.Key('Route', 1)
        query_results = Timing.query(ancestor=key).fetch(2)
        self.assertEqual(len(query_results), 1)

        test_data = query_results.pop()
        self.assertEqual(test_data.duration, 1111)
        self.assertEqual(test_data.distance, 'distance')
Example #2
0
def measure_timing(route_id):
    google_maps = Gmaps()
    result = google_maps.lookup_travel_time(route_id=route_id - 1)

    parent_key = ndb.Key('Route', route_id)
    Timing(parent=parent_key,
           distance=result['distance'],
           duration=result['duration']).put()

    return '', 204
Example #3
0
    def test_fetch_timings_amount_for_past_day(self):
        now = datetime.now()
        parent_key = ndb.Key('Route', 5)
        # dummy data
        Timing(parent=ndb.Key('Route', 7), create_time=now, distance='9.9 mi', duration=5555).put()

        # target test data
        for i in range(150):
            diff = timedelta(minutes=10)
            timestamp = now - (diff * i)

            Timing(parent=parent_key, create_time=timestamp, distance='10.0 mi', duration=1234+i).put()

        endpoint = '/api/v1/routes/5/day'
        response = self.app.get(endpoint)
        self.assertEqual(response.status_code, 200)

        body = json.loads(response.data)
        self.assertEqual(len(body['data']), 144)
        for element in body['data']:
            self.assertEqual(element['distance'], '10.0 mi')
Example #4
0
    def helper_populate_datastore(self, num_of_elements, route_id):
        parent_key = ndb.Key('Route', route_id)
        now = datetime.now()
        time_interval = timedelta(minutes=10)

        elements = []
        for i in range(num_of_elements):
            timestamp = now - (time_interval * i)

            current = Timing(parent=parent_key, create_time=timestamp, distance='10.0 mi', duration=1234+i)
            elements.append(current)

        return ndb.put_multi(elements)
Example #5
0
def get_day_timings(route_id):
    cached_result = check_cache(route_id=route_id)
    if cached_result is not None:
        return reply_payload(payload=cached_result)

    timing_data = Timing.get_past_day(route_id=route_id)
    result = []
    for datapoint in timing_data:
        result.append({
            'distance': datapoint.distance,
            'duration': datapoint.duration,
            'timestamp': datapoint.create_time.strftime('%s')
        })

    save_cache(route_id=route_id, data=result)

    return reply_payload(result)
Example #6
0
    def test_fetch_cache(self):
        test_keys = self.helper_populate_datastore(num_of_elements=3, route_id=1)

        Timing(parent=ndb.Key('Route', 2), create_time=datetime.now(), distance='9.9 mi', duration=5555).put()

        endpoint = '/api/v1/routes/1/day'
        response = self.app.get(endpoint)
        body = json.loads(response.data)

        self.assertEqual(len(body['data']), 3)

        ndb.delete_multi(test_keys)

        response = self.app.get(endpoint)
        self.assertEqual(response.status_code, 200)

        body = json.loads(response.data)
        self.assertEqual(len(body['data']), 3)
Example #7
0
    def test_fetch_day_timings(self):
        now = datetime.now()
        time_interval = timedelta(minutes=10)
        self.helper_populate_datastore(num_of_elements=3, route_id=11)

        # unreleated data in the set
        Timing(parent=ndb.Key('Route', 2), create_time=now, distance='9.9 mi', duration=5555).put()

        endpoint = '/api/v1/routes/11/day'
        response = self.app.get(endpoint)

        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.headers['Content-Type'], 'application/json')

        body = json.loads(response.data)

        self.assertEqual(len(body['data']), 3)
        self.assertDictEqual(body, {
            'data': [
                {
                    'distance': '10.0 mi',
                    'duration': 1236,
                    'timestamp': (now - (time_interval * 2)).strftime('%s')
                },
                {
                    'distance': '10.0 mi',
                    'duration': 1235,
                    'timestamp': (now - (time_interval * 1)).strftime('%s')
                },
                {
                    'distance': '10.0 mi',
                    'duration': 1234,
                    'timestamp': now.strftime('%s')
                }
            ]
        })
Example #8
0
def main():
    """
    Main entry point for the application when run from the command line.
    """

    # Timing instance
    timing = Timing(['Snippet', 'Time [m]', 'Time [s]'])

    with timing.timeit_context_add('Pre-processing'):

        # Setup parse options command line
        current_path = os.path.dirname(
            os.path.abspath(inspect.getfile(inspect.currentframe())))
        parser = args.setup_parser(current_path + '/args/mfbn.json')
        options = parser.parse_args()
        args.update_json(options)
        args.check_output(options)

        if options.input and options.vertices is None:
            print('Vertices are required when input is given.')
            sys.exit(1)

    # Load bipartite graph
    with timing.timeit_context_add('Load graph'):

        source_graph = MGraph()
        source_graph.load(options.input, options.vertices)

    # Coarsening
    with timing.timeit_context_add('Coarsening'):

        kwargs = dict(reduction_factor=options.reduction_factor,
                      max_levels=options.max_levels,
                      matching=options.matching,
                      similarity=options.similarity,
                      itr=options.itr,
                      upper_bound=options.upper_bound,
                      gmv=options.gmv,
                      tolerance=options.tolerance,
                      reverse=options.reverse,
                      seed_priority=options.seed_priority,
                      threads=options.threads)

        coarsening = Coarsening(source_graph, **kwargs)
        coarsening.run()

    # Save
    with timing.timeit_context_add('Save'):

        output = options.output
        for index, obj in enumerate(
                zip(coarsening.hierarchy_levels, coarsening.hierarchy_graphs)):
            level, coarsened_graph = obj
            index += 1

            if options.save_conf or options.show_conf:
                d = {
                    'source_input': options.input,
                    'source_vertices': source_graph['vertices'],
                    'source_vcount': source_graph.vcount(),
                    'source_ecount': source_graph.ecount(),
                    'coarsened_ecount': coarsened_graph.ecount(),
                    'coarsened_vcount': coarsened_graph.vcount(),
                    'coarsened_vertices': coarsened_graph['vertices'],
                    'achieved_levels': coarsened_graph['level'],
                    'reduction_factor': options.reduction_factor,
                    'max_levels': options.max_levels,
                    'similarity': options.similarity,
                    'matching': options.matching,
                    'upper_bound': options.upper_bound,
                    'gmv': options.gmv,
                    'itr': options.itr,
                    'level': level
                }

            if options.save_conf:
                with open(output + '-' + str(index) + '-info.json', 'w+') as f:
                    json.dump(d, f, indent=4)

            if options.show_conf:
                print(json.dumps(d, indent=4))

            if options.save_ncol:
                coarsened_graph.write(output + '-' + str(index) + '.ncol',
                                      format='ncol')

            if options.save_source:
                with open(output + '-' + str(index) + '.source', 'w+') as f:
                    for v in coarsened_graph.vs():
                        f.write(' '.join(map(str, v['source'])) + '\n')

            if options.save_membership:
                membership = [0] * (source_graph['vertices'][0] +
                                    source_graph['vertices'][1])
                for v in coarsened_graph.vs():
                    for source in v['source']:
                        membership[source] = v.index
                numpy.savetxt(output + '-' + str(index) + '.membership',
                              membership,
                              fmt='%d')

            if options.save_predecessor:
                with open(output + '-' + str(index) + '.predecessor',
                          'w+') as f:
                    for v in coarsened_graph.vs():
                        f.write(' '.join(map(str, v['predecessor'])) + '\n')

            if options.save_successor:
                numpy.savetxt(output + '-' + str(index) + '.successor',
                              coarsened_graph.vs['successor'],
                              fmt='%d')

            if options.save_weight:
                numpy.savetxt(output + '-' + str(index) + '.weight',
                              coarsened_graph.vs['weight'],
                              fmt='%d')

            if options.save_gml:
                del coarsened_graph['adjlist']
                del coarsened_graph['similarity']
                coarsened_graph['layers'] = str(coarsened_graph['layers'])
                coarsened_graph['vertices'] = ','.join(
                    map(str, coarsened_graph['vertices']))
                coarsened_graph['level'] = ','.join(
                    map(str, coarsened_graph['level']))
                coarsened_graph.vs['name'] = map(
                    str, range(0, coarsened_graph.vcount()))
                coarsened_graph.vs['type'] = map(str,
                                                 coarsened_graph.vs['type'])
                coarsened_graph.vs['weight'] = map(
                    str, coarsened_graph.vs['weight'])
                coarsened_graph.vs['successor'] = map(
                    str, coarsened_graph.vs['successor'])
                for v in coarsened_graph.vs():
                    v['source'] = ','.join(map(str, v['source']))
                    v['predecessor'] = ','.join(map(str, v['predecessor']))
                coarsened_graph.write(output + '-' + str(index) + '.gml',
                                      format='gml')

            if not options.save_hierarchy:
                break

    if options.show_timing:
        timing.print_tabular()
    if options.save_timing_csv:
        timing.save_csv(output + '-timing.csv')
    if options.save_timing_json:
        timing.save_json(output + '-timing.json')
Example #9
0
    def __init__(self):
        """ Initialize the bnoc app

        For help use:
            > python bnoc.py --help
        """

        self.timing = Timing(['Snippet', 'Time [m]', 'Time [s]'])
        with self.timing.timeit_context_add('Pre-processing'):
            # Setup parse options command line
            current_path = os.path.dirname(
                os.path.abspath(inspect.getfile(inspect.currentframe())))
            parser = args.setup_parser(current_path + '/args/bnoc.json')
            self.options = parser.parse_args()
            args.update_json(self.options)
            args.check_output(self.options)
            self.log = helper.initialize_logger(dir='log', output='log')

            if self.options.save_arff and (self.options.x is not None):
                self.log.warning(
                    'Warning: Arff format does not allow overlap in the first layer (parameter x).\
                                Please use --save_arff=False or supress x parameter.'
                )
                sys.exit(1)

            self.layers = len(self.options.vertices)

            self.start_end = []
            for layer in range(self.layers):
                start = sum(self.options.vertices[0:layer])
                end = sum(self.options.vertices[0:layer + 1]) - 1
                self.start_end.append([start, end])

            if self.options.p is None or self.options.balanced is True:
                self.generate_balanced_probabilities()

            for p in self.options.p:
                if str(numpy.sum(round(sum(p), 1))) != str(1.0):
                    self.log.warning(
                        'Warning: The sum of probabilities p1 must be equal to 1.'
                    )
                    sys.exit(1)

            if len(self.options.communities) is None:
                self.options.communities = [1] * len(self.options.vertices)

            if self.options.x is not None and isinstance(self.options.x, int):
                self.options.x = [self.options.x] * self.layers
            if self.options.y is not None and isinstance(self.options.y, int):
                self.options.y = [self.options.y] * self.layers
            if self.options.z is not None and isinstance(self.options.z, int):
                self.options.z = [self.options.z] * self.layers

            if all(isinstance(item, tuple) for item in self.options.schema):
                self.options.schema = [
                    list(elem) for elem in self.options.schema
                ]
            if not all(isinstance(item, list) for item in self.options.schema):
                it = iter(self.options.schema)
                self.options.schema = zip(it, it)

            if self.options.mu is not None and isinstance(
                    self.options.mu, (int, float)):
                self.options.mu = [self.options.mu] * len(self.options.schema)
            if self.options.dispersion is not None and isinstance(
                    self.options.dispersion, (int, float)):
                self.options.dispersion = [self.options.dispersion] * len(
                    self.options.schema)
            if self.options.noise is not None and isinstance(
                    self.options.noise, (int, float)):
                self.options.noise = [self.options.noise] * len(
                    self.options.schema)

            for layer, comm in enumerate(self.options.communities):
                if comm == 0:
                    self.log.warning(
                        'The number of communities must be greater than zero.')
                    sys.exit(1)
                if self.options.communities[layer] > self.options.vertices[
                        layer]:
                    self.log.warning(
                        'Warning: The number of communities must be less than the number of vertices.'
                    )
                    sys.exit(1)

            if self.options.x is not None and self.options.z is not None:
                if self.options.z[layer] > self.options.communities[layer]:
                    self.log.warning(
                        'Warning: Number of vertices of overlapping communities must be less than \
                                    the number of communities in all layers.')
                    sys.exit(1)
                if sum(self.options.x) > 0 and sum(self.options.z) == 0:
                    self.options.z = [2] * len(self.options.x)
Example #10
0
class bnoc(object):
    def __init__(self):
        """ Initialize the bnoc app

        For help use:
            > python bnoc.py --help
        """

        self.timing = Timing(['Snippet', 'Time [m]', 'Time [s]'])
        with self.timing.timeit_context_add('Pre-processing'):
            # Setup parse options command line
            current_path = os.path.dirname(
                os.path.abspath(inspect.getfile(inspect.currentframe())))
            parser = args.setup_parser(current_path + '/args/bnoc.json')
            self.options = parser.parse_args()
            args.update_json(self.options)
            args.check_output(self.options)
            self.log = helper.initialize_logger(dir='log', output='log')

            if self.options.save_arff and (self.options.x is not None):
                self.log.warning(
                    'Warning: Arff format does not allow overlap in the first layer (parameter x).\
                                Please use --save_arff=False or supress x parameter.'
                )
                sys.exit(1)

            self.layers = len(self.options.vertices)

            self.start_end = []
            for layer in range(self.layers):
                start = sum(self.options.vertices[0:layer])
                end = sum(self.options.vertices[0:layer + 1]) - 1
                self.start_end.append([start, end])

            if self.options.p is None or self.options.balanced is True:
                self.generate_balanced_probabilities()

            for p in self.options.p:
                if str(numpy.sum(round(sum(p), 1))) != str(1.0):
                    self.log.warning(
                        'Warning: The sum of probabilities p1 must be equal to 1.'
                    )
                    sys.exit(1)

            if len(self.options.communities) is None:
                self.options.communities = [1] * len(self.options.vertices)

            if self.options.x is not None and isinstance(self.options.x, int):
                self.options.x = [self.options.x] * self.layers
            if self.options.y is not None and isinstance(self.options.y, int):
                self.options.y = [self.options.y] * self.layers
            if self.options.z is not None and isinstance(self.options.z, int):
                self.options.z = [self.options.z] * self.layers

            if all(isinstance(item, tuple) for item in self.options.schema):
                self.options.schema = [
                    list(elem) for elem in self.options.schema
                ]
            if not all(isinstance(item, list) for item in self.options.schema):
                it = iter(self.options.schema)
                self.options.schema = zip(it, it)

            if self.options.mu is not None and isinstance(
                    self.options.mu, (int, float)):
                self.options.mu = [self.options.mu] * len(self.options.schema)
            if self.options.dispersion is not None and isinstance(
                    self.options.dispersion, (int, float)):
                self.options.dispersion = [self.options.dispersion] * len(
                    self.options.schema)
            if self.options.noise is not None and isinstance(
                    self.options.noise, (int, float)):
                self.options.noise = [self.options.noise] * len(
                    self.options.schema)

            for layer, comm in enumerate(self.options.communities):
                if comm == 0:
                    self.log.warning(
                        'The number of communities must be greater than zero.')
                    sys.exit(1)
                if self.options.communities[layer] > self.options.vertices[
                        layer]:
                    self.log.warning(
                        'Warning: The number of communities must be less than the number of vertices.'
                    )
                    sys.exit(1)

            if self.options.x is not None and self.options.z is not None:
                if self.options.z[layer] > self.options.communities[layer]:
                    self.log.warning(
                        'Warning: Number of vertices of overlapping communities must be less than \
                                    the number of communities in all layers.')
                    sys.exit(1)
                if sum(self.options.x) > 0 and sum(self.options.z) == 0:
                    self.options.z = [2] * len(self.options.x)

    def add_noise(self, matrix, noise):
        """ Insert a noise in adjacent matrix
            Noise or Threshold in (0,1]
        """

        # Removing a fraction of inter-community edges [numpy.random.seed(1)]
        num_samples = numpy.count_nonzero(matrix)
        Z = [False]
        while not any(Z):  # while all elements are 'False'
            Z = numpy.random.rand(num_samples) < noise
            # Z = numpy.random.uniform(0.0, 1.0, num_samples) < noise
        Y = matrix[matrix > 0]
        removed_weights = Y[Z]
        Y[Z] = 0
        if self.options.hard:
            matrix[matrix > 0] = Y

        # Adding a fraction of intra-community edges
        num_samples = numpy.count_nonzero(matrix == 0)
        Z = [False]
        while not any(Z):  # while all elements are 'False'
            # Z = numpy.random.rand(num_samples) < noise
            Z = numpy.random.uniform(0.0, 1.0, num_samples) < noise
        Y = matrix[matrix == 0]
        _mean = numpy.mean(removed_weights, dtype=numpy.float64)
        # _mean = numpy.median(removed_weights)
        if not self.options.hard:
            removed_weights = [_mean] * len(removed_weights)
        removed_weights = list(removed_weights) + (
            [0] * (numpy.count_nonzero(Z) - len(removed_weights)))
        Y[Z] = numpy.random.choice(removed_weights, numpy.count_nonzero(Z))
        matrix[matrix == 0] = Y

        return matrix

    def generate_balanced_probabilities(self):
        """ Generates a list of probabilities for each class when the probabilities
        are not given by the user or the balanced flag is on.
        """

        if self.options.p is None:
            self.options.p = empty_lists = [[] for i in range(self.layers)]
        for layer in range(self.layers):
            avg = float(1.0 / self.options.communities[layer])
            self.options.p[layer] = [avg] * self.options.communities[layer]
            self.options.p[layer][-1] = float(1.0 -
                                              sum(self.options.p[layer][:-1]))

    def create_vertices_and_communities(self):
        """ Creates a list that gives the class for each element in the positioning
        order for one type of element
        """

        self.membership = [[] for i in range(self.layers)]
        for layer in range(self.layers):
            for itr in range(max_itr):
                self.membership[layer] = numpy.random.choice(
                    self.options.communities[layer],
                    size=self.options.vertices[layer],
                    replace=True,
                    p=self.options.p[layer])
                self.membership[layer] = sorted(self.membership[layer])
                unique_row = numpy.unique(self.membership[layer])
                if len(unique_row) == self.options.communities[layer]:
                    break
                if itr == max_itr:
                    self.log.warning(
                        'Warning: Convergence failure, reduce the number of communities or run again.'
                    )
                    sys.exit(1)

    def create_cover(self):
        """ Creates a list of list that maps from each community to the nodes in it """

        self.unique_comms = [0] * self.layers
        self.cover = [[] for i in range(self.layers)]
        for layer in range(self.layers):
            self.unique_comms[layer] = list(
                range(self.options.communities[layer]))
            self.cover[layer] = numpy.empty(
                (self.options.communities[layer], 0)).tolist()
            for vertex, comm in enumerate(self.membership[layer]):
                self.cover[layer][comm].append(vertex +
                                               self.start_end[layer][0])

    def select_overlapping_vertices(self):
        """ Select x vertices to be member of z communities, as expected by model """

        self.overlap = [[] for i in range(self.layers)]
        if self.options.x is not None and sum(self.options.x) > 0:
            for layer in range(self.layers):
                self.overlap[layer] = numpy.random.choice(
                    range(self.start_end[layer][0],
                          self.start_end[layer][1] + 1),
                    self.options.x[layer],
                    replace=False)
                for vertex in self.overlap[layer]:
                    comms = copy.copy(self.unique_comms[layer])
                    comms.remove(
                        self.membership[layer][vertex -
                                               self.start_end[layer][0]])
                    random.shuffle(comms)
                    # Update communities
                    for comm in comms[:(self.options.z[layer] - 1)]:
                        self.cover[layer][comm].append(vertex)

    def create_biadj_matrix(self, l0, l1, dispersion, mu):
        """ Create an unweighted adjacenty matrix with community structure. """

        # Create a empty biparte
        matrix = numpy.zeros(
            (self.options.vertices[l0], self.options.vertices[l1]),
            dtype=numpy.float64)
        unique_comms = [self.unique_comms[l0], self.unique_comms[l1]]
        _max = unique_comms.index(max(unique_comms, key=len))
        _min = unique_comms.index(min(unique_comms, key=len))
        # Connect all vertices in each module
        multiplier = math.ceil(
            len(unique_comms[_max]) / float(len(unique_comms[_min])))
        unique_comms[_min] = unique_comms[_min] * int(multiplier)
        unique_comms[_min] = unique_comms[_min][:len(unique_comms[_max])]

        for index in range(len(unique_comms[_max])):
            for u in self.cover[l0][unique_comms[0][index]]:
                for v in self.cover[l1][unique_comms[1][index]]:
                    matrix[u - self.start_end[l0][0],
                           v - self.start_end[l1][0]] = 1

        # Make a large negative binomial distribution
        num_samples = numpy.count_nonzero(matrix)
        # prob = dispersion / (dispersion + mu)
        # prob = ((mu + dispersion * mu ** 2) - mu) / (mu + dispersion * mu ** 2)
        # from scipy.stats import nbinom
        # distribution = nbinom.rvs(dispersion, prob, size=num_samples)
        distribution = numpy.random.negative_binomial(dispersion, 1 - mu,
                                                      num_samples)
        if self.options.normalize:
            distribution = distribution / numpy.linalg.norm(distribution)
        # numpy.set_printoptions(threshold=numpy.nan)
        # print distribution
        matrix[matrix > 0] = distribution

        return matrix

    def save_text(self, output):

        # Save type
        if self.options.save_type:
            with open(output + '.type', 'w+') as f:
                for layer in range(self.layers):
                    for i in range(self.options.vertices[layer]):
                        f.write(str(layer) + '\n')

        # Save overlap
        if self.options.save_overlap:
            for layer in range(self.layers):
                if len(self.overlap[layer]) > 0:
                    with open(output + '.overrow', 'w+') as f:
                        writer = csv.writer(f, delimiter=' ')
                        writer.writerow(self.overlap[layer])

        # Save cover
        if self.options.save_cover:
            for layer in range(self.layers):
                with open(output + '-layer-' + str(layer) + '.cover',
                          'w+') as f:
                    writer = csv.writer(f, delimiter=' ')
                    for values in self.cover[layer]:
                        writer.writerow(values)

        # Save membership
        if self.options.save_membership:
            with open(output + '.membership', 'w+') as f:
                writer = csv.writer(f, delimiter=' ')
                for layer in range(self.layers):
                    clusters = self.cover[layer]
                    _clusters = [list(cluster) for cluster in clusters]
                    _n = max(
                        max(cluster) + 1 for cluster in _clusters if cluster)
                    result = [[] for _ in range(_n)]
                    for idx, cluster in enumerate(clusters):
                        for item in cluster:
                            result[item].append(idx)
                    for sublist in result:
                        if sublist:
                            writer.writerow(sublist)

        # Save bipartite network
        if self.options.save_ncol or self.options.save_gml or self.options.save_arff:
            edgelist = ''
            dict_edges = dict()
            for key, matrix in enumerate(self.matrices):
                l0 = self.options.schema[key][0]
                l1 = self.options.schema[key][1]
                for i in range(matrix.shape[0]):
                    for j in range(matrix.shape[1]):
                        if matrix[i, j] != 0:
                            u = i + self.start_end[l0][0]
                            v = j + self.start_end[l1][0]
                            if self.options.unweighted is False:
                                weight = numpy.around(matrix[i, j], decimals=3)
                            else:
                                weight = 1.0
                            edgelist += '%s %s %s\n' % (u, v, weight)
                            dict_edges[(u, v)] = float(weight)

        # Save ncol
        if self.options.save_ncol:
            with open(output + '.ncol', 'w+') as f:
                f.write(edgelist)

        # Save arff
        if self.options.save_arff:
            self.log.warning('Arff format still under development.')
            sys.exit(1)

    def save_npy(self, output):

        # Save npy
        if self.options.save_ncol:
            numpy.save(output + '-matrices.npy', self.matrices)

        # Save type
        if self.options.save_type:
            type = []
            for layer, vertices in enumerate(self.options.vertices):
                type.extend([layer] * vertices)
            numpy.save(output + '-type.npy', type)

        # Save overlap
        if self.options.save_overlap:
            numpy.save(output + '-overlap.npy', self.overlap)

        # Save cover
        if self.options.save_cover:
            numpy.save(output + '-cover.npy', self.cover)

        # Save membership
        if self.options.save_membership:
            for layer in range(self.layers):
                clusters = self.cover[layer]
                _clusters = [list(cluster) for cluster in clusters]
                _n = max(max(cluster) + 1 for cluster in _clusters if cluster)
                result = [[] for _ in range(_n)]
                for idx, cluster in enumerate(clusters):
                    for item in cluster:
                        result[item].append(idx)
            numpy.save(output + '-membership.npy', result)

    def build(self):
        """ Runs the application. """

        # Graph construction
        with self.timing.timeit_context_add('Build BNOC'):
            self.create_vertices_and_communities()
            self.create_cover()
            self.select_overlapping_vertices()
            self.matrices = []
            for index, e in enumerate(self.options.schema):
                matrix = self.create_biadj_matrix(
                    e[0], e[1], self.options.dispersion[index],
                    self.options.mu[index])
                if self.options.noise[index] > 0.0:
                    matrix = self.add_noise(matrix, self.options.noise[index])
                self.matrices.append(matrix)

        # Save
        with self.timing.timeit_context_add('Save'):
            # Save json inf file
            output = self.options.output
            with open(output + '-inf.json', 'w+') as f:
                d = {
                    'output': self.options.output,
                    'directory': self.options.directory,
                    'extension': 'ncol',
                    'vertices': self.options.vertices,
                    'communities': self.options.communities,
                    'x': self.options.x,
                    'z': self.options.z,
                    'p': self.options.p,
                    'balanced': self.options.balanced,
                    'd': self.options.dispersion,
                    'mu': self.options.mu,
                    'noise': self.options.noise,
                    'unweighted': self.options.unweighted,
                    'normalize': self.options.normalize,
                    'conf': self.options.conf,
                    'show_timing': self.options.show_timing,
                    'save_timing_csv': self.options.save_timing_csv,
                    'save_timing_json': self.options.save_timing_json,
                    'unique_key': self.options.unique_key,
                    'edges': 0
                }
                for matrix in self.matrices:
                    d['edges'] += numpy.count_nonzero(matrix)
                json.dump(d, f, indent=4)

            if self.options.output_npy:
                self.save_npy(output)
            if self.options.output_text:
                self.save_text(output)

        if self.options.show_timing:
            self.timing.print_tabular()
        if self.options.save_timing_csv:
            self.timing.save_csv(output + '-timing.csv')
        if self.options.save_timing_json:
            self.timing.save_json(output + '-timing.csv')
Example #11
0
def main():
	"""
	Main entry point for the application when run from the command line.
	"""

	# Timing instanciation
	timing = Timing(['Snippet', 'Time [m]', 'Time [s]'])

	with timing.timeit_context_add('Pre-processing'):

		# Setup parse options command line
		current_path = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
		parser = args.setup_parser(current_path + '/args/mdr.json')
		options = parser.parse_args()
		args.update_json(options)
		args.check_output(options)

		# Log instanciation
		log = helper.initialize_logger(dir='log', output='log')

		if options.input and options.vertices is None:
			log.warning('Vertices are required when input is given.')
			sys.exit(1)

		# Create default values for optional parameters
		if options.reduction_factor is None:
			options.reduction_factor = 0.5
		if options.max_levels is None:
			options.max_levels = 3
		if options.matching is None:
			options.matching = 'greedy_seed_twohops'
		if options.similarity is None:
			options.similarity = 'weighted_common_neighbors'

		# Validation of matching method
		valid_matching = ['gmb', 'rgmb', 'hem', 'lem', 'rm']
		if options.matching.lower() not in valid_matching:
			log.warning('Matching method is unvalid.')
			sys.exit(1)

		# Validation of input extension
		valid_input = ['.arff', '.dat']
		if options.extension not in valid_input:
			log.warning('Input is unvalid.')
			sys.exit(1)

		# Validation of similarity measure
		valid_similarity = ['common_neighbors', 'weighted_common_neighbors',
		'salton', 'preferential_attachment', 'jaccard', 'adamic_adar',
		'resource_allocation', 'sorensen', 'hub_promoted', 'hub_depressed',
		'leicht_holme_newman', 'weighted_jaccard']
		if options.similarity.lower() not in valid_similarity:
			log.warning('Similarity misure is unvalid.')
			sys.exit(1)

		options.vertices = map(int, options.vertices)
		options.max_levels = int(options.max_levels)
		options.reduction_factor = float(options.reduction_factor)

	# Load bipartite graph
	with timing.timeit_context_add('Load'):
		if options.extension == '.arff':
			graph = helperigraph.load_csr(options.input)
		elif options.extension == '.dat':
			graph = helperigraph.load_dat(options.input, skip_last_column=options.skip_last_column, skip_rows=options.skip_rows)
		graph['level'] = 0

	# Coarsening
	with timing.timeit_context_add('Coarsening'):
		hierarchy_graphs = []
		hierarchy_levels = []
		while not graph['level'] == options.max_levels:

			matching = range(graph.vcount())
			levels = graph['level']

			levels += 1
			graph['similarity'] = getattr(Similarity(graph, graph['adjlist']), options.similarity)
			start = sum(graph['vertices'][0:1])
			end = sum(graph['vertices'][0:1 + 1])
			if options.matching in ['hem', 'lem', 'rm']:
				one_mode_graph = graph.weighted_one_mode_projection(vertices)
				matching_method = getattr(one_mode_graph, options.matching)
				matching_method(matching, reduction_factor=options.reduction_factor)
			else:
				matching_method = getattr(graph, options.matching)
				matching_method(range(start, end), matching, reduction_factor=options.reduction_factor)

			coarse = graph.contract(matching)
			coarse['level'] = levels
			graph = coarse
			if options.save_hierarchy or (graph['level'] == options.max_levels):
				hierarchy_graphs.append(graph)
				hierarchy_levels.append(levels)

	# Save
	with timing.timeit_context_add('Save'):

		output = options.output
		for index, obj in enumerate(reversed(zip(hierarchy_levels, hierarchy_graphs))):
			levels, graph = obj

			if options.save_conf:
				with open(output + '-' + str(index) + '.conf', 'w+') as f:
					d = {}
					d['source_filename'] = options.input
					d['source_v0'] = options.vertices[0]
					d['source_v1'] = options.vertices[1]
					d['source_vertices'] = options.vertices[0] + options.vertices[1]
					d['edges'] = graph.ecount()
					d['vertices'] = graph.vcount()
					d['reduction_factor'] = options.reduction_factor
					d['max_levels'] = options.max_levels
					d['similarity'] = options.similarity
					d['matching'] = options.matching
					d['levels'] = levels
					for layer in range(graph['layers']):
						vcount = str(len(graph.vs.select(type=layer)))
						attr = 'v' + str(layer)
						d[attr] = vcount
					json.dump(d, f, indent=4)

			if options.save_ncol:
				graph.write(output + '-' + str(index) + '.ncol', format='ncol')

			if options.save_source:
				with open(output + '-' + str(index) + '.source', 'w+') as f:
					for v in graph.vs():
						f.write(' '.join(map(str, v['source'])) + '\n')

			if options.save_predecessor:
				with open(output + '-' + str(index) + '.predecessor', 'w+') as f:
					for v in graph.vs():
						f.write(' '.join(map(str, v['predecessor'])) + '\n')

			if options.save_successor:
				numpy.savetxt(output + '-' + str(index) + '.successor', graph.vs['successor'], fmt='%d')

			if options.save_weight:
				numpy.savetxt(output + '-' + str(index) + '.weight', graph.vs['weight'], fmt='%d')

			if options.save_adjacency:
				numpy.savetxt(output + '-' + str(index) + '.dat', helperigraph.biajcent_matrix(graph), fmt='%.2f')

			if options.save_gml:
				del graph['adjlist']
				del graph['similarity']
				graph['layers'] = str(graph['layers'])
				graph['vertices'] = ','.join(map(str, graph['vertices']))
				graph['level'] = str(graph['level'])
				graph.vs['name'] = map(str, range(0, graph.vcount()))
				graph.vs['type'] = map(str, graph.vs['type'])
				graph.vs['weight'] = map(str, graph.vs['weight'])
				graph.vs['successor'] = map(str, graph.vs['successor'])
				for v in graph.vs():
					v['source'] = ','.join(map(str, v['source']))
					v['predecessor'] = ','.join(map(str, v['predecessor']))
				graph.write(output + '-' + str(index) + '.gml', format='gml')

			if not options.save_hierarchy:
				break

	if options.show_timing:
		timing.print_tabular()
	if options.save_timing_csv:
		timing.save_csv(output + '-timing.csv')
	if options.save_timing_json:
		timing.save_json(output + '-timing.csv')