예제 #1
0
class Connections:
    def __init__(self, transverse):
        self.t = transverse
        self.data = Access(self.t.events)

    def get_terminus(self):
        return self.data.get_terminus()

    def set_relationship(self, parent_node, child_url):
        entry = Entry(child_url, self.t.reddit)
        entry.set_next()
        node, _ = self.data.is_new_node(entry)
        self.data.update_parent_next(parent_node, entry)
        self.data.add_link(parent_node, node)

    def create_url_relationship(self, parent_url, child_url):
        parent, _ = self.t.data.is_new_node(Entry(parent_url, self.t.reddit))
        child, _ = self.t.data.is_new_node(Entry(child_url, self.t.reddit))
        self.t.data.add_link(parent, child)

    def extend_terminus(self):
        nodes = self.get_terminus()
        for node in nodes:
            if node['next_id']:
                entry = Entry(node['raw_url'], self.t.reddit)
                entry.set_next()
                if entry.next_entry is not None:
                    self.t.build_graph(entry.next_entry)
예제 #2
0
class Transverse:
    def __init__(self, events):
        config = Conf()
        self.reddit = praw.Reddit(user_agent='Switcharoo Cartographer v.0.2.1')
        self.reddit.login(config.r_username, config.r_password, disable_warning=True)
        self.events = events
        self.queue = None
        self.data = Access(self.events)
        self.source = Source(self)
        self._port = config.com_port
        self._share_port = config.share_port
        self._auth = config.auth
        self._should_stop = False
        self._threshold = 10

    def init_queue(self):
        if self.queue is None:
            self.queue = EntryQueue(self)

    def build_graph(self, current_entry):
        entry_point = True
        stop = False
        found = False
        found_list = []
        while current_entry is not None and not stop:
            current_entry.set_next()
            if current_entry.next_entry is None and entry_point:
                self.source.mark_searched(current_entry)
                return found, found_list
            entry_point = False
            # Check if item is already in graph
            node, stop = self.data.is_new_node(current_entry)
            # New node
            if not stop:
                found = True
                found_list.append(node)
            parents = self.data.get_parents(current_entry)
            for parent in parents:
                created = self.data.add_link(parent, node)
                if created:
                    found = True
            self.source.mark_searched(current_entry)
            current_entry = current_entry.next_entry
        return found, found_list

    def analyze_found(self, list):
        if len(list) > 0:
            manager = BaseManager(address=('', self._share_port), authkey=self._auth)
            manager.register('get_meta_data')
            manager.connect()
            distances, max_dist = manager.get_meta_data()
            for n in list:
                try:
                    if max_dist - distances[n._id] > self._threshold:
                        # Do something here
                        print "*** Node " + str(n._id) + " hit the threshold"
                except KeyError:
                    # Do query here to see if node exists. If it does than node
                    # does not link to origin
                    print "*** Node " + str(n._id) + " may not link to origin"

    def loop(self, limit, sleep=10):
        while 1:
            current_entry = self.source.add_to_queue(limit, sleep)
            found, found_list = self.build_graph(current_entry)
            if found:
                self.events.on_clearing_cache()
                clear_cache(self._port)
                self.analyze_found(found_list)