Ejemplo n.º 1
0
 def set_remote_instance(self, remote_inst):
     self.remote_inst_id = get_instance_id(self.cur, remote_inst) if remote_inst else ""
     if self.inst_id:
         if self.inst_id == self.remote_inst_id:
             raise Exception("Remote instance same as local")
     elif not self.remote_inst_id:
         raise Exception("Neither local nor remote instance is set")
Ejemplo n.º 2
0
 def __init__(self, cur):
     VolumeHolder.__init__(self)
     CursorWrapper.__init__(self, cur)
     inst_name = get_option("instance", None)
     self.inst_id = get_instance_id(cur, inst_name)
     self.alt_repre = get_option("storage_alternative", None)
     self.mime_type = get_option("server_mime_type", None)
     self.check_locality = get_option("server_check_locality", True)
Ejemplo n.º 3
0
    def do_add_instance(self):
        self.cur.execute(
            """insert into instances(instance_name)
values(%s)
on conflict do nothing
returning id""", (self.inst_name, ))
        row = self.cur.fetchone()
        self.inst_id = row[0] if row else get_instance_id(
            self.cur, self.inst_name)
Ejemplo n.º 4
0
    def __init__(self, cur):
        self.cur = cur
        self.volume_id = None
        self.member_count = 0
        self.zip_front = None
        self.zip_back = None
        self.volume_threshold = int(
            get_option('volume_threshold', str(1024 * 1024 * 1024)))
        self.compress_backoff = int(get_option('compress_backoff', str(3600)))

        inst_name = get_option("instance", None)
        self.inst_id = get_instance_id(cur, inst_name)
Ejemplo n.º 5
0
    def __init__(self, single_action, conn, cur):
        VolumeHolder.__init__(self)
        HostCheck.__init__(self, cur)

        inst_name = get_option("instance", None)
        self.instance_id = get_instance_id(
            cur, inst_name)  # self.inst_id already used by HostCheck

        self.mem_cache = MemCache(
            int(get_option('parse_cache_high_mark', "2000")),
            int(get_option('parse_cache_low_mark', "1000")))

        if get_option('download_preference', 'novelty') == 'novelty':
            self.preference = NoveltyPreference(
                int(get_option('novelty_high_mark', "20000")),
                int(get_option('novelty_low_mark', "15000")))
        else:
            self.preference = BreathPreference()

        self.single_action = single_action
        self.conn = conn
        self.notification_threshold = int(
            get_option('parse_notification_threshold', "1000"))

        page_limit = get_option("page_limit", None)
        self.page_limit = int(page_limit) if page_limit else None
        self.page_count = 0

        self.max_url_len = int(get_option("max_url_len", "512"))

        # ignore case flag would be better dynamic, but Python 3.5.2
        # doesn't support that...
        url_blacklist_rx = get_option("url_blacklist_rx",
                                      "[.](?:jpe?g|pdf|png)$")
        self.url_blacklist_rx = re.compile(url_blacklist_rx,
                                           re.I) if url_blacklist_rx else None

        url_whitelist_rx = get_option("url_whitelist_rx", None)
        self.url_whitelist_rx = re.compile(url_whitelist_rx,
                                           re.I) if url_whitelist_rx else None

        self.comp_param = True if get_option("comp_param", True) else False
        if self.comp_param:
            self.cur.execute("""select nameval
from param_blacklist
order by nameval""")
            rows = self.cur.fetchall()
            self.param_blacklist = set((row[0] for row in rows))
Ejemplo n.º 6
0
    def __init__(self, cur, inst_name=None):
        CursorWrapper.__init__(self, cur)
        self.own_max_num_conn = int(get_option('own_max_num_conn', "4"))
        self.healthcheck_interval = int(get_option("healthcheck_interval", "100"))
        self.healthcheck_threshold = int(get_option("healthcheck_threshold", "80"))
        if self.healthcheck_threshold <= 0:
            # disabled
            self.healthcheck_interval = 0

        self.target_queue = [] # of TargetBase descendants
        self.progressing = [] # of URL IDs
        self.volume_progressing = [] # of volume IDs
        self.total_checked = 0
        self.total_processed = 0
        self.total_error = 0

        server_name = get_mandatory_option('server_name')
        raw_port = get_option('server_port', "8888")
        self.endpoint_root = "http://%s:%d" % (server_name, int(raw_port))

        inst_name = get_option("instance", None)
        self.inst_id = get_instance_id(cur, inst_name)
        self.remote_inst_id = None # None => not initialized, "" => remote doesn't have instance
Ejemplo n.º 7
0
 def __init__(self, cur):
     CursorWrapper.__init__(self, cur)
     inst_name = get_option("instance", None)
     self.inst_id = get_instance_id(cur, inst_name)
Ejemplo n.º 8
0
    def __init__(self, cur):
        self.cur = cur  # no CursorWrapper methods needed here

        inst_name = get_option("instance", None)
        self.inst_id = get_instance_id(cur, inst_name)