Example #1
0
    def test_should_download_zipped_csv(self):
        os = OSFS("./tests/test_integration/resources/")
        file_name = "test_csv_zipped"
        test_zip_file = 'http://localhost:8001/local_data/base_train.zip'

        test_ds_zip = DataSet(os, file_name, "test_id", test_zip_file,
                              "test dataset", "zip")
        test_ds_zip.download()
        test_ds_zip.unzip_file()
        df = pd.read_csv(test_ds_zip.uri)
        self.assertEqual((2, 2), df.shape)
        os.remove(file_name + "/train.csv")
        os.removedir(file_name)

        ## only download
        os = OSFS("./tests/test_integration/resources/")
        file_name = "train.csv"
        test_file = 'http://localhost:8001/local_data/train.csv'

        test_ds = DataSet(os, file_name, "test_id", test_file, "test dataset")
        test_ds.download()
        test_ds.unzip_file()
        df = pd.read_csv(test_ds.uri)
        self.assertEqual((2, 2), df.shape)
        os.remove(file_name)
Example #2
0
def create_production(build_dir, backups, script_dir):
    """Put the staging version to production hosted at 
    register.geostandaarden.nl
    """

    print "Building production..."
    logging.info("Building production...")

    deploy = OSFS('..')
    
    if deploy.exists(backups) == False:
        deploy.makedir(backups)

    deploy.copydir('%s/%s' % (script_dir, build_dir), 'register-new', overwrite=True)

    if deploy.exists('register') == True:
        # server refuses to recursively remove register/staging
        # hence we excplicitly remove symbolic link to staging
        try:
            deploy.remove('register/staging/staging')
        except ResourceNotFoundError:
            print "Warning, register/staging/staging not found..."

        try:
            deploy.removedir('register/staging')
        except ResourceNotFoundError:
            print "Warning, register/staging not found..."
        
        backup_dir = time.strftime('%Y-%m-%d-%H-%M-%S')

        # if deploy.exists('backups/%s' % backup_dir): 
        #     deploy.removedir('backups/%s' % backup_dir, force=True)
        
        deploy.copydir('register', 'backups/%s' % backup_dir, overwrite=True)
        
        try:
            deploy.movedir('register', 'register-old', overwrite=True)
        except ResourceNotFoundError:
            pass

    deploy.movedir('register-new', 'register', overwrite=True)

    # create symbolic link to standalone staging directory
    # fails if production is built first...
    deploy.makedir('register/staging')
    call('cd ../register/staging; ln -s ../../staging', shell=True)
    call('cd ../register; ln -s ../%s/log.txt' % script_dir  , shell=True)
    
    try:
        deploy.removedir('register-old', force=True)
    except ResourceNotFoundError:
        pass

    call('chmod -R a+rx ../register', shell=True)

    print "Done building production..."
    logging.info("Production built successfully!")
Example #3
0
 def test_unzip_local_data(self):
     os = OSFS(".")
     os_remove = os.remove
     os.remove = mock.Mock(return_value=None)
     os.copy("./tests/resources/local_data/base_train.zip",
             "./tests/resources/local_data/train.zip")
     test_local = DataSet(os, "/local/path", "train",
                          "./tests/resources/local_data/train.zip",
                          "test dataset", "zip")
     test_local.unzip_file()
     result = os.exists("./tests/resources/local_data/train/train.csv")
     os.remove = os_remove
     os.remove("./tests/resources/local_data/train/train.csv")
     os.remove("./tests/resources/local_data/train.zip")
     os.removedir("./tests/resources/local_data/train")
     self.assertTrue(result)
Example #4
0
def create_production(destination, backups, script_entry_path, production_path):
    """Put the staging version to production hosted at 
    register.geostandaarden.nl
    """

    ## TODO: feed this function absolute paths

    print "Building production..."
    logging.info("Building production...")

    production = OSFS(production_path)
    
    # if production.exists(backups) == False:
    #     production.makedir(backups)

    # copy newly baked register/staging to production directory
    # NOTE: only build paths within script_dir are currently supported
    call ('cp -r %s %s' % (ospath.join(build_path, destination), ospath.join(production_path, destination + '-new')), shell=True)
    # production.copydir('%s/%s/%s' % (script_dir, build_path, destination), destination + '-new', overwrite=True)

    if production.exists(destination) == True:
        # server refuses to recursively remove register/staging
        # hence we excplicitly remove symbolic link to staging
        try:
            production.remove('%s/staging/staging' % destination)
        except ResourceNotFoundError:
            print "Warning, %s/staging/staging not found..." % destination

        try:
            production.removedir('%s/staging' % destination)
        except ResourceNotFoundError:
            print "Warning, %s/staging not found..." % destination
        
        backup_dir = time.strftime('%Y-%m-%d-%H-%M-%S')

        # if production.exists('backups/%s' % backup_dir): 
        #     production.removedir('backups/%s' % backup_dir, force=True)
        
        production.copydir(destination, '%s/%s' % (backups, backup_dir), overwrite=True)
        
        try:
            production.movedir(destination, destination + '-old', overwrite=True)
        except ResourceNotFoundError:
            pass

    production.movedir(destination + '-new', destination, overwrite=True)

    # create symbolic link to standalone staging directory
    # fails if production is built first...
    production.makedir('%s/staging' % destination)
    
    call('cd %s; ln -s %s' % (ospath.join(production_path, destination, 'staging'), ospath.join(production_path, 'staging')), shell=True)
    call('cd %s; ln -s %s' % (ospath.join(production_path, destination), ospath.join(script_entry_path, 'log.txt')), shell=True)
    
    try:
        production.removedir(destination + '-old', force=True)
    except ResourceNotFoundError:
        pass

    call('chmod -R a+rx %s/%s' % (production_path, destination), shell=True)

    print "Done building production..."
    logging.info("Production built successfully!")
Example #5
0
class Timeline(object):
    """A timeline is a sequence of timestamped events."""
    def __init__(self, path, name, max_events=None):
        self.path = path
        self.name = name
        self.fs = OSFS(path, create=True)
        self.max_events = max_events

    def __repr__(self):
        return "Timeline({!r}, {!r}, max_events={!r})".format(
            self.path, self.name, self.max_events)

    def new_event(self, event_type, timestamp=None, *args, **kwargs):
        """Create and return an event, to be used as a context manager"""
        if self.max_events is not None:
            size = len(self.fs.listdir(wildcard="*.json"))
            if size >= self.max_events:
                raise TimelineFullError(
                    "The timeline has reached its maximum size")

        if timestamp is None:
            timestamp = int(time() * 1000.0)
        try:
            event_cls = _event_registry[event_type]
        except KeyError:
            raise UnknownEventError("No event type '{}'".format(event_type))

        # Make an event id that we can be confident it's unique
        token = str(randint(0, 2**31))
        event_id = "{}_{}_{}".format(event_type, timestamp, token)
        event = event_cls(self, event_id, timestamp, *args, **kwargs)
        log.debug('new event {!r}'.format(event))
        return event

    def new_photo(self, file, filename=None, ext=None, **kwargs):
        """Create a new photo object"""
        event = self.new_event('IMAGE', **kwargs)

        if hasattr(file, 'getvalue'):
            bytes = file.getvalue()
        elif file is not None:
            if isinstance(file, basestring):
                with open(file, 'rb') as f:
                    bytes = f.read()
            else:
                bytes = file.read()
        else:
            if bytes is None:
                raise ValueError("A value for 'file' or 'bytes' is required")
        event.attach_bytes(bytes, name='photo', filename=filename, ext=ext)
        return event

    def get_events(self, sort=True):
        """Get all accumulated events"""
        events = []
        for event_filename in self.fs.listdir(wildcard="*.json"):
            with self.fs.open(event_filename, 'rb') as f:
                event = loads(f.read())
                events.append(event)
        if sort:
            # sort by timestamp
            events.sort(key=itemgetter('timestamp'))
        return events

    def clear_all(self):
        """Clear all stored events"""
        for filename in self.fs.listdir(wildcard="*.json"):
            try:
                self.fs.remove(filename)
            except FSError:
                pass

    def clear_events(self, event_ids):
        """Clear any events that have been processed"""
        for event_id in event_ids:
            filename = "{}.json".format(event_id)
            try:
                self.fs.remove(filename)
            except FSError:
                pass

    def _write_event(self, event_id, event):
        if hasattr(event, 'to_data'):
            event = event.to_data()
        event['event_id'] = event_id
        event_json = dumps(event, indent=4)
        filename = "{}.json".format(event_id)
        with self.fs.open(filename, 'wb') as f:
            f.write(event_json)
Example #6
0
class Timeline(object):
    """A timeline is a sequence of timestamped events."""

    def __init__(self, path, name, max_events=None):
        self.path = path
        self.name = name
        self.fs = OSFS(path, create=True)
        self.max_events = max_events

    def __repr__(self):
        return "Timeline({!r}, {!r}, max_events={!r})".format(self.path, self.name, self.max_events)

    def new_event(self, event_type, timestamp=None, *args, **kwargs):
        """Create and return an event, to be used as a context manager"""
        if self.max_events is not None:
            size = len(self.fs.listdir(wildcard="*.json"))
            if size >= self.max_events:
                raise TimelineFullError("The timeline has reached its maximum size")

        if timestamp is None:
            timestamp = int(time() * 1000.0)
        try:
            event_cls = _event_registry[event_type]
        except KeyError:
            raise UnknownEventError("No event type '{}'".format(event_type))

        # Make an event id that we can be confident it's unique
        token = str(randint(0, 2 ** 31))
        event_id = kwargs.pop('event_id', None) or "{}_{}_{}".format(event_type, timestamp, token)
        event = event_cls(self, event_id, timestamp, *args, **kwargs)
        log.debug('new event {!r}'.format(event))
        return event

    def new_photo(self, file, filename=None, ext=None, **kwargs):
        """Create a new photo object"""
        event = self.new_event('IMAGE', **kwargs)

        if hasattr(file, 'getvalue'):
            bytes = file.getvalue()
        elif file is not None:
            if isinstance(file, text_type):
                with open(file, 'rb') as f:
                    bytes = f.read()
            else:
                bytes = file.read()
        else:
            if bytes is None:
                raise ValueError("A value for 'file' or 'bytes' is required")
        event.attach_bytes(bytes, name='photo', filename=filename, ext=ext)
        return event

    def get_events(self, sort=True):
        """Get all accumulated events"""
        events = []
        for event_filename in self.fs.listdir(wildcard="*.json"):
            with self.fs.open(event_filename, 'rb') as f:
                event = loads(f.read().decode('utf-8'))
                events.append(event)
        if sort:
            # sort by timestamp
            events.sort(key=itemgetter('timestamp'))
        return events

    def clear_all(self):
        """Clear all stored events"""
        for filename in self.fs.listdir(wildcard="*.json"):
            try:
                self.fs.remove(filename)
            except FSError:
                pass

    def clear_events(self, event_ids):
        """Clear any events that have been processed"""
        for event_id in event_ids:
            filename = "{}.json".format(event_id)
            try:
                self.fs.remove(filename)
            except FSError:
                pass

    def _write_event(self, event_id, event):
        if hasattr(event, 'to_data'):
            event = event.to_data()
        event['event_id'] = event_id
        event_json = dumps(event, indent=4).encode('utf-8')
        filename = "{}.json".format(event_id)
        with self.fs.open(filename, 'wb') as f:
            f.write(event_json)
Example #7
0
class TestDatasetManager(unittest.TestCase):

    trash_dir = "./tests/resources/trash_data"

    def setUp(self):
        self.os = OSFS(".")

    def tearDown(self):
        for data in self.os.listdir(self.trash_dir):
            if data != ".keep":
                self.os.remove("{}/{}".format(self.trash_dir, data))
        self.os.close()

    def test_should_read_yaml_from_dir(self):

        expected = {
            "one_test": {
                "source": "http://source/teste",
                "description": "my little dataset"
            }
        }

        data = DatasetManager("./tests/resources/one_data")
        self.assertDictEqual(data.get_datasets(), expected)

    def test_should_read_multiple_yaml_from_dir(self):

        expected = {
            "one_test": {
                "source":
                "https://raw.githubusercontent.com/pcsanwald/kaggle-titanic/master/train.csv",
                "description": "my little dataset"
            },
            "two_test": {
                "source":
                "https://raw.githubusercontent.com/pcsanwald/kaggle-titanic/master/train.csv",
                "description": "my little dataset 2"
            }
        }

        data = DatasetManager("./tests/resources/multiple_data", fs=self.os)
        result = list(data.get_datasets().keys())
        result.sort()
        expected = ["one_test", "two_test"]
        self.assertListEqual(expected, result)

    def test_should_get_dataset(self):

        data = DatasetManager("./tests/resources/local_data")
        dataset = {
            "local_test": {
                "source": "./tests/resources/local_data/train.csv",
                "description": "my little dataset local"
            }
        }
        self.assertDictEqual(data.get_dataset("local_test"),
                             dataset.get("local_test"))

    def test_should_get_dataset_unknown(self):

        data = DatasetManager("./tests/resources/local_data")
        with self.assertRaises(IOError):
            data.get_dataset("unknown_test")

    def test_should_create_dataset(self):
        data = DatasetManager(self.trash_dir, fs=self.os)
        identifier = "data_name"
        dataset = {
            "identifier": identifier,
            "description": "description",
            "source": "/tmp/test.csv",
        }

        data.create_dataset(**dataset)

        loaded_datasets = data.get_datasets()
        dataset_config = loaded_datasets.get(identifier)

        self.assertTrue(
            self.os.isfile("{}/{}.yaml".format(self.trash_dir, identifier)))
        self.assertEqual(len(self.os.listdir(self.trash_dir)), 2)

        self.assertEqual(list(loaded_datasets.keys())[0], identifier)
        self.assertEqual(dataset_config.get("description"),
                         dataset["description"])
        self.assertEqual(dataset_config.get("source"), dataset["source"])

    def test_should_create_dataset_with_custom_data(self):
        data = DatasetManager(self.trash_dir, fs=self.os)
        identifier = "data_name_custom"
        dataset = {
            "identifier": identifier,
            "description": "description",
            "source": "/tmp/test.csv"
        }
        data.create_dataset(**dataset)
        self.assertTrue(
            self.os.isfile("{}/{}.yaml".format(self.trash_dir, identifier)))

        self.assertEqual(len(os.listdir(self.trash_dir)), 2)
        loaded_dataset = data.get_datasets()
        self.assertEqual(list(loaded_dataset.keys()), [identifier])

        datasource_configs = loaded_dataset.get(identifier)
        self.assertEqual(datasource_configs["description"],
                         dataset["description"])
        self.assertEqual(datasource_configs["source"], dataset["source"])

    def test_should_remove_dataset(self):
        data = DatasetManager(self.trash_dir, fs=self.os)
        identifier = "data_name"
        dataset = {
            "identifier": identifier,
            "description": "description",
            "source": "/tmp/test.csv"
        }
        data.create_dataset(**dataset)
        self.assertTrue(
            os.path.isfile("{}/{}.yaml".format(self.trash_dir, identifier)))
        self.assertEqual(len(os.listdir(self.trash_dir)), 2)
        data.remove_dataset(identifier)
        self.assertFalse(
            os.path.isfile("{}/{}.yaml".format(self.trash_dir, identifier)))
        self.assertEqual(len(os.listdir(self.trash_dir)), 1)

    def test_should_remove_unknown_dataset(self):

        data = DatasetManager("./tests/resources/local_data", fs=self.os)
        with self.assertRaises(IOError):
            data.remove_dataset("unknown_dataset")
    if "renpy.zip" not in cwdfs.listdir("/"):
        puts("Downloading Ren'Py")
        r = requests.get(
            "https://www.renpy.org/dl/6.99.12.4/renpy-6.99.12.4-sdk.zip",
            stream=True)
        r.raise_for_status()
        with cwdfs.open("renpy.zip", 'wb') as fd:
            total_length = int(r.headers.get('content-length'))
            for chunk in progress.bar(r.iter_content(chunk_size=1024),
                                      expected_size=(total_length / 1024) + 1):
                fd.write(chunk)

    puts("Extracting Ren'Py")
    with ZipFS("./renpy.zip") as zipfs:
        fscopy.copy_dir(zipfs, "renpy-6.99.12.4-sdk", tempfs, "renpy")
    cwdfs.remove("renpy.zip")

puts("ModTemplate setup")

with indent(2):
    if "modtemplate.zip" not in cwdfs.listdir("/"):
        puts("Downloading ModTemplate")
        r = requests.get(
            "https://github.com/Monika-After-Story/DDLCModTemplate/releases/download/v1.1.0/DDLCModTemplate_1.1.0.zip",
            stream=True)
        r.raise_for_status()
        with cwdfs.open("modtemplate.zip", 'wb') as fd:
            total_length = int(r.headers.get('content-length'))
            for chunk in progress.bar(r.iter_content(chunk_size=1024),
                                      expected_size=(total_length / 1024) + 1):
                fd.write(chunk)