Esempio n. 1
0
    def __init__(self, account_id, application_key, bucket_id,
                 enable_hashfiles, temp_folder, use_disk):
        account_info = InMemoryAccountInfo()
        self.api = B2Api(account_info)
        self.api.authorize_account('production', account_id, application_key)
        self.bucket_api = CachedBucket(self.api, bucket_id)

        self.logger = logging.getLogger("%s.%s" %
                                        (__name__, self.__class__.__name__))

        self.enable_hashfiles = enable_hashfiles
        self.temp_folder = temp_folder
        self.use_disk = use_disk

        if self.use_disk:
            if os.path.exists(self.temp_folder):
                self.logger.error("Temporary folder exists, exiting")
                exit(1)

            os.makedirs(self.temp_folder)
            self.B2File = B2FileDisk
        else:
            self.B2File = B2SequentialFileMemory

        self._directories = DirectoryStructure()
        self.local_directories = []

        self.open_files = defaultdict(self.B2File)

        self.fd = 0
Esempio n. 2
0
 def setUp(self):
     self.bucket_name = 'my-bucket'
     self.simulator = RawSimulator()
     self.account_info = StubAccountInfo()
     self.api = B2Api(self.account_info, raw_api=self.simulator)
     self.api.authorize_account('production', 'my-account', 'good-app-key')
     self.bucket = self.api.create_bucket('my-bucket', 'allPublic')
Esempio n. 3
0
    def __init__(self,
                 bucket,
                 path,
                 account_id,
                 app_key,
                 workers=10,
                 compare_method='mtime'):
        super(B2Reciever, self).__init__()
        self.log = logging.getLogger("B2Reciever")
        self.bucket_name = bucket
        self.path = path.lstrip('/')
        self.account_id = account_id
        self.app_key = app_key

        self.api = B2Api(max_upload_workers=workers)
        self.api.authorize_account('production', self.account_id, self.app_key)
        self.bucket = self.api.get_bucket_by_name(self.bucket_name)

        self.db = None
        self._db_setup()

        # The receiver is responsible to determining if a file needs to be uploaded or not
        self.should_transfer = {
            "mtime": self._should_transfer_mtime,
            "size": self._should_transfer_size
        }[compare_method]
Esempio n. 4
0
 def __init__(self, account_id, application_key, bucket_id, db_file):
     try:
         account_info = InMemoryAccountInfo()
         self.api = B2Api(account_info)
         self.api.authorize_account("production", account_id,
                                    application_key)
     except B2ConnectionError as e:
         print(e)
         raise ConnectionError
     self.bucket_api = Bucket(self.api, bucket_id)
     self.file_info_store = FileInfoStore(db_file)
 def setUp(self):
     self.bucket_name = 'my-bucket'
     self.simulator = RawSimulator()
     self.account_info = StubAccountInfo()
     self.api = B2Api(self.account_info, raw_api=self.simulator)
     (self.account_id, self.master_key) = self.simulator.create_account()
     self.api.authorize_account('production', self.account_id,
                                self.master_key)
     self.api_url = self.account_info.get_api_url()
     self.account_auth_token = self.account_info.get_account_auth_token()
     self.bucket = self.api.create_bucket('my-bucket', 'allPublic')
     self.bucket_id = self.bucket.id_
Esempio n. 6
0
    def __init__(self, uri):
        try:
            assert uri.startswith('b2://')
            self.bucket, self.prefix = uri[5:].split('/', 1)

            self.api = B2Api()
            self.api.authorize_account('production', self.B2_ACCOUNT_ID,
                                       self.B2_APPLICATION_KEY)
            self.c = self._get_b2_bucket()
        except (AssertionError, Exception) as e:
            logger.exception(e)
            raise CloseSpider('could not initialize B2')
Esempio n. 7
0
def main(api_key, project_id, spider_name, b2_account_id, b2_application_key,
         b2_path, delete):
    bucket_name, root = split_bucket_prefix(b2_path)
    bucket = None

    client = ScrapinghubClient(api_key)
    project = client.get_project(project_id)
    for name in spider_name:
        spider = project.spiders.get(name)
        job_list = spider.jobs.list(state='finished')
        keys = []
        for job in job_list:
            if 'items' in job and job['items'] > 0:
                keys.append(job['key'])

        if keys:
            if bucket is None:
                b2_api = B2Api()
                b2_api.authorize_account('production', b2_account_id,
                                         b2_application_key)
                bucket = b2_api.get_bucket_by_name(bucket_name)
            for key in keys:
                job = spider.jobs.get(key)
                if job:
                    out = io.BytesIO()

                    with gzip.GzipFile(fileobj=out, mode='w') as outfile:
                        for item in job.items.iter():
                            line = json.dumps(item) + '\n'
                            outfile.write(line.encode('utf8'))
                    content = out.getvalue()
                    file_name = os.path.join(root, name,
                                             key.replace('/', '-') + '.jl.gz')
                    upload_file(bucket, file_name, content)

                    if delete:
                        job.delete()
                        logging.warning('job {} deleted'.format(key))
Esempio n. 8
0
 def __init__(self, bucket_id):
     info = SqliteAccountInfo()
     self.api = B2Api(info, AuthInfoCache(info), raw_api=B2RawApi(B2Http()))
     self.bucket = self.api.get_bucket_by_name(bucket_id)
Esempio n. 9
0
 def setUp(self):
     self.account_info = StubAccountInfo()
     self.cache = InMemoryCache()
     self.raw_api = RawSimulator()
     self.b2_api = B2Api(self.account_info, self.cache, self.raw_api)
Esempio n. 10
0
 def setUp(self):
     self.account_info = InMemoryAccountInfo()
     self.cache = DummyCache()
     self.raw_api = RawSimulator()
     self.api = B2Api(self.account_info, self.cache, self.raw_api)
     (self.account_id, self.master_key) = self.raw_api.create_account()
Esempio n. 11
0
def list_target_files(config):
    import urllib.parse
    try:
        target = urllib.parse.urlparse(config["target"])
    except ValueError:
        return "invalid target"

    if target.scheme == "file":
        return [(fn, os.path.getsize(os.path.join(target.path, fn)))
                for fn in os.listdir(target.path)]

    elif target.scheme == "rsync":
        rsync_fn_size_re = re.compile(r'.*    ([^ ]*) [^ ]* [^ ]* (.*)')
        rsync_target = '{host}:{path}'

        target_path = target.path
        if not target_path.endswith('/'):
            target_path = target_path + '/'
        if target_path.startswith('/'):
            target_path = target_path[1:]

        rsync_command = [
            'rsync', '-e',
            rsync_ssh_options(config["target_rsync_port"], direct=True),
            '--list-only', '-r',
            rsync_target.format(host=target.netloc, path=target_path)
        ]

        code, listing = shell('check_output',
                              rsync_command,
                              trap=True,
                              capture_stderr=True)
        if code == 0:
            ret = []
            for l in listing.split('\n'):
                match = rsync_fn_size_re.match(l)
                if match:
                    ret.append((match.groups()[1],
                                int(match.groups()[0].replace(',', ''))))
            return ret
        else:
            if 'Permission denied (publickey).' in listing:
                reason = "Invalid user or check you correctly copied the SSH key."
            elif 'No such file or directory' in listing:
                reason = "Provided path {} is invalid.".format(target_path)
            elif 'Network is unreachable' in listing:
                reason = "The IP address {} is unreachable.".format(
                    target.hostname)
            elif 'Could not resolve hostname' in listing:
                reason = "The hostname {} cannot be resolved.".format(
                    target.hostname)
            else:
                reason = "Unknown error. " \
                  "Please check running 'management/backup.py --verify' " \
                  "from mailinabox sources to debug the issue."
            raise ValueError(
                "Connection to rsync host failed: {}".format(reason))

    elif target.scheme == "s3":
        # match to a Region
        fix_boto()  # must call prior to importing boto
        import boto.s3
        from boto.exception import BotoServerError
        custom_region = False
        for region in boto.s3.regions():
            if region.endpoint == target.hostname:
                break
        else:
            # If region is not found this is a custom region
            custom_region = True

        bucket = target.path[1:].split('/')[0]
        path = '/'.join(target.path[1:].split('/')[1:]) + '/'

        # Create a custom region with custom endpoint
        if custom_region:
            from boto.s3.connection import S3Connection
            region = boto.s3.S3RegionInfo(name=bucket,
                                          endpoint=target.hostname,
                                          connection_cls=S3Connection)

        # If no prefix is specified, set the path to '', otherwise boto won't list the files
        if path == '/':
            path = ''

        if bucket == "":
            raise ValueError("Enter an S3 bucket name.")

        # connect to the region & bucket
        try:
            conn = region.connect(aws_access_key_id=config["target_user"],
                                  aws_secret_access_key=config["target_pass"])
            bucket = conn.get_bucket(bucket)
        except BotoServerError as e:
            if e.status == 403:
                raise ValueError("Invalid S3 access key or secret access key.")
            elif e.status == 404:
                raise ValueError("Invalid S3 bucket name.")
            elif e.status == 301:
                raise ValueError("Incorrect region for this bucket.")
            raise ValueError(e.reason)

        return [(key.name[len(path):], key.size)
                for key in bucket.list(prefix=path)]
    elif target.scheme == 'b2':
        InMemoryAccountInfo = None
        B2Api = None
        NonExistentBucket = None

        if get_os_code() == "Debian10":
            # WARNING: This is deprecated code using a legacy library.
            # We need it because Debian 10 ships with an old version of Duplicity
            from b2.account_info import InMemoryAccountInfo
            from b2.api import B2Api
            from b2.exception import NonExistentBucket
        else:
            from b2sdk.v1 import InMemoryAccountInfo, B2Api
            from b2sdk.v1.exception import NonExistentBucket

        info = InMemoryAccountInfo()
        b2_api = B2Api(info)

        # Extract information from target
        b2_application_keyid = target.netloc[:target.netloc.index(':')]
        b2_application_key = target.netloc[target.netloc.index(':') +
                                           1:target.netloc.index('@')]
        b2_bucket = target.netloc[target.netloc.index('@') + 1:]

        try:
            b2_api.authorize_account("production", b2_application_keyid,
                                     b2_application_key)
            bucket = b2_api.get_bucket_by_name(b2_bucket)
        except NonExistentBucket as e:
            raise ValueError(
                "B2 Bucket does not exist. Please double check your information!"
            )
        return [(key.file_name, key.size) for key, _ in bucket.ls()]

    else:
        raise ValueError(config["target"])