예제 #1
0
    def list(self, marker):
        with open(self._url_list_file, 'r') as f:
            for line in f:
                try:
                    field = line.split()
                    if len(field) < 1:
                        logger.warn("{} is invalid".format(line))
                        continue
                    check_value = None
                    url_path = None
                    if len(field) == 1:
                        url_path = field[0]
                    else:
                        check_value = field[0].strip()
                        url_path = field[1]
                    ret = urlparse.urlparse(url_path)
                    if ret.path == '':
                        logger.warn("{} is invalid, No path".format(line))
                        continue
                    # use HEAD to get object size
                    file_size = None
                    try:
                        response = requests.head(url_path, timeout=5)
                        if response.status_code == 200:
                            file_size = response.headers['content-length']
                    except Exception as e:
                        logger.exception("HEAD object failed with " + str(e))
                        continue

                    logger.info("yield new object: {}".format(str({'store_path': ret.path.strip(), 'url_path': url_path.strip()})))
                    yield task.Task(ret.path.strip()[1:], file_size, url_path.strip(), check_value)

                except Exception:
                    logger.warn("{} is invalid".format(line))
예제 #2
0
    def list(self):
        with open(self._url_list_file, 'r') as f:
            for line in f:
                try:
                    field = line.split('\t')
                    if len(field) < 1:
                        logger.warn("{} is invalid".format(line))
                        continue
                    check_value = None
                    url_path = None
                    if len(field) == 1:
                        url_path = field[0]
                    else:
                        check_value = field[0].strip()
                        url_path = field[1]
                    ret = urlparse.urlparse(url_path)
                    if ret.path == '':
                        logger.warn("{} is invalid, No path".format(line))
                    logger.info("yield new object: {}".format(
                        str({
                            'store_path': ret.path.strip(),
                            'url_path': url_path.strip()
                        })))
                    yield task.Task(ret.path.strip()[1:], check_value,
                                    url_path.strip())

                except Exception:
                    logger.warn("{} is invalid".format(line))