Esempio n. 1
0
    def GET(self):
        if librarian.is_traktor_running():
            response = {"status": "error", "message": "Please quit Traktor first."}
        else:
            cleaner = Cleaner(Library.instance())
            cleaner.remove_duplicates()
            logger.debug(u"Duplicate removal complete")

            response = cleaner.get_result()
            response["status"] = "ok"
        web.header("Cache-Control", "no-cache")
        return json.dumps(response)
    def GET(self):
        if librarian.is_traktor_running():
            response = {"status": "error", "message": "Please quit Traktor first."}
        else:
            cleaner = Cleaner(Library.instance())
            cleaner.remove_duplicates()
            logger.debug(u"Duplicate removal complete")

            response = cleaner.get_result()
            response["status"] = "ok"
        web.header("Cache-Control", "no-cache")
        return json.dumps(response)
Esempio n. 3
0
def main():
    parser = argparse.ArgumentParser(description="OpsGenie Alert Classifier")
    parser.add_argument('file', type=lambda x: is_valid_file(parser, x),
                        metavar='FILE', help='file to work with')
    parser.add_argument("--clean", nargs='+', dest="clean",
                        help="create a 'clean' file with whitelisted columns a raw file")
    parser.add_argument("--remove", nargs='+', dest="remove",
                        help="Match rows to remove based the 'Message' column")
    parser.add_argument("--count", nargs='?', dest="count", default=None, const=None,
                        help="count of alerts grouped by specified column name")
    parser.add_argument("--fuzzy-count", nargs='?', dest="fuzzy_count", default=None, const=None,
                        help="fuzzy count alerts grouped by specified column name")
    parser.add_argument("--limit", nargs='?', dest="limit", default=20, const=20, type=int,
                        help="limit number of results returned (default: 20)")
    parser.add_argument("--interval", nargs='+', dest="interval",
                        help="Time interval in hours to filter alerts")
    parser.add_argument("--match", nargs='?', dest="match", default=None, const=None,
                        help="Regex match against specified column name for count")
    parser.add_argument("--update-minutes", nargs='?', dest="update_minutes", default=None, const=None,
                        help="Number of minutes between 'CreatedAt' and 'UpdatedAt'")
    parser.add_argument("--outfile", nargs='?', dest="outfile", default=None, const=None,
                        help="Optional file to output results of count")
    parser.add_argument("--threshold", nargs='?', dest="threshold", default=90, const=90, type=int,
                        help="Threshold for alert fuzzy match (default: 100 - so 100% match)")
    parser.add_argument("--remove-numbers", nargs='?', dest="remove_numbers", default=False, const=None, type=bool,
                        help="Remove numbers from alias before doing fuzzy matching (default: False). \
                        To be used in conjuction with the fuzzy threshold flag")
    parser.add_argument('--alias-strip-list', type=lambda x: is_valid_file(parser, x),
                        dest='strip_file', help='csv file with a column of values to strip', metavar="FILE")
    args = parser.parse_args()

    if args.clean:
        if not args.file.endswith("raw.csv"):
            parser.error("The file {} does not end with 'raw.csv'".format(args.file))
        Cleaner.clean(args.file, args.clean, args.remove)
    elif args.count:
        counter = Counter()
        counter.count(file=args.file, column=args.count, limit=args.limit, interval=args.interval,
                      match=args.match, update_minutes=args.update_minutes, outfile=args.outfile)
    elif args.fuzzy_count:
        fuzzy_counter = FuzzyCounter()
        fuzzy_counter.count(file=args.file, column=args.fuzzy_count, limit=args.limit, threshold=args.threshold, 
                            remove_numbers=args.remove_numbers, outfile=args.outfile,
                            alias_strip_list=args.strip_file)
def redact(png_path):
    """
    Takes in a path png_path to a path of an image file, and overwrites it as a cleaned image file
    """
    file = Image.open(png_path)
    imarray = np.array(file)
    clean_array = Cleaner.find_redactions(imarray)
    file = Image.fromarray(clean_array)
    file.save(png_path, 'PNG')
    return
Esempio n. 5
0
def main():
    try:
        lib = Library(conf.library_dir)
        logger.debug("Starting")

        if conf.action == "clean":
            cleaner = Cleaner(lib)
            print("Removing duplicates..."),
            cleaner.remove_duplicates()
            print("DONE")

            cleaner.report()

            if not conf.test:
                lib.flush()
                print("\nTraktor library updated.")
            else:
                print("\nTest run. No changes made to the library.")
        elif conf.action == "export":
            exporter = Exporter(lib, conf.export_dir)
            exporter.export()

    except Exception as e:
        logger.error(e, exc_info=False)
Esempio n. 6
0
def main():
    try:
        lib = Library(conf.library_dir)

        if conf.action == "clean":
            cleaner = Cleaner(lib)
            print("Removing duplicates..."),
            cleaner.remove_duplicates()
            print("DONE")

            cleaner.report()

            if not conf.test:
                lib.flush()
                print("\nTraktor library updated.")
            else:
                print("\nTest run. No changes made to the library.")
        elif conf.action == "export":
            exporter = Exporter(lib, conf.export_dir)
            exporter.export()

    except Exception as e:
        logger.error(e, exc_info=False)
Esempio n. 7
0
    def run(settings):
        """
        Executes a single run where certain datasets might or might not be snapshotted
        """

        now = datetime.now()
        yda = datetime.now() - timedelta(1)
        today = '{0:04d}{1:02d}{2:02d}'.format(now.year, now.month, now.day)
        yesterday = '{0:04d}{1:02d}{2:02d}'.format(yda.year, yda.month,
                                                   yda.day)

        snapshots = ZFS.get_snapshots()
        datasets = ZFS.get_datasets()
        for dataset in datasets:
            if dataset in settings:
                try:
                    dataset_settings = settings[dataset]
                    local_snapshots = snapshots.get(dataset, [])

                    take_snapshot = dataset_settings['snapshot'] is True
                    replicate = dataset_settings['replicate'] is not None

                    # Decide whether we need to handle this dataset
                    execute = False
                    if take_snapshot is True or replicate is True:
                        if dataset_settings['time'] == 'trigger':
                            # We wait until we find a trigger file in the filesystem
                            trigger_filename = '{0}/.trigger'.format(
                                dataset_settings['mountpoint'])
                            if os.path.exists(trigger_filename):
                                Manager.logger.info(
                                    'Trigger found on {0}'.format(dataset))
                                os.remove(trigger_filename)
                                execute = True
                        else:
                            trigger_time = dataset_settings['time'].split(':')
                            hour = int(trigger_time[0])
                            minutes = int(trigger_time[1])
                            if (now.hour > hour or
                                (now.hour == hour and now.minute >= minutes)
                                ) and today not in local_snapshots:
                                Manager.logger.info(
                                    'Time passed for {0}'.format(dataset))
                                execute = True

                    if execute is True:
                        # Pre exectution command
                        if dataset_settings['preexec'] is not None:
                            Helper.run_command(dataset_settings['preexec'],
                                               '/')

                        if take_snapshot is True:
                            # Take today's snapshotzfs
                            Manager.logger.info(
                                'Taking snapshot {0}@{1}'.format(
                                    dataset, today))
                            ZFS.snapshot(dataset, today)
                            local_snapshots.append(today)
                            Manager.logger.info(
                                'Taking snapshot {0}@{1} complete'.format(
                                    dataset, today))

                        # Replicating, if required
                        if replicate is True:
                            Manager.logger.info(
                                'Replicating {0}'.format(dataset))
                            replicate_settings = dataset_settings['replicate']
                            push = replicate_settings['target'] is not None
                            remote_dataset = replicate_settings[
                                'target'] if push else replicate_settings[
                                    'source']
                            remote_snapshots = ZFS.get_snapshots(
                                remote_dataset, replicate_settings['endpoint'])
                            last_common_snapshot = None
                            if remote_dataset in remote_snapshots:
                                if push is True:  # If pushing, we search for the last local snapshot that is remotely available
                                    for snapshot in local_snapshots:
                                        if snapshot in remote_snapshots[
                                                remote_dataset]:
                                            last_common_snapshot = snapshot
                                else:  # Else, we search for the last remote snapshot that is locally available
                                    for snapshot in remote_snapshots[
                                            remote_dataset]:
                                        if snapshot in local_snapshots:
                                            last_common_snapshot = snapshot
                            if last_common_snapshot is not None:  # There's a common snapshot
                                previous_snapshot = None
                                if push is True:
                                    for snapshot in local_snapshots:
                                        if snapshot == last_common_snapshot:
                                            previous_snapshot = last_common_snapshot
                                            continue
                                        if previous_snapshot is not None:
                                            # There is a snapshot on this host that is not yet on the other side.
                                            size = ZFS.get_size(
                                                dataset, previous_snapshot,
                                                snapshot)
                                            Manager.logger.info(
                                                '  {0}@{1} > {0}@{2} ({3})'.
                                                format(dataset,
                                                       previous_snapshot,
                                                       snapshot, size))
                                            ZFS.replicate(
                                                dataset,
                                                previous_snapshot,
                                                snapshot,
                                                remote_dataset,
                                                replicate_settings.get(
                                                    'buffer_size',
                                                    BUFFER_SIZE),
                                                replicate_settings['endpoint'],
                                                direction='push',
                                                compression=replicate_settings[
                                                    'compression'])
                                            ZFS.hold(dataset, snapshot)
                                            ZFS.hold(
                                                remote_dataset, snapshot,
                                                replicate_settings['endpoint'])
                                            ZFS.release(
                                                dataset, previous_snapshot)
                                            ZFS.release(
                                                remote_dataset,
                                                previous_snapshot,
                                                replicate_settings['endpoint'])
                                            previous_snapshot = snapshot
                                else:
                                    for snapshot in remote_snapshots[
                                            remote_dataset]:
                                        if snapshot == last_common_snapshot:
                                            previous_snapshot = last_common_snapshot
                                            continue
                                        if previous_snapshot is not None:
                                            # There is a remote snapshot that is not yet on the local host.
                                            size = ZFS.get_size(
                                                remote_dataset,
                                                previous_snapshot, snapshot,
                                                replicate_settings['endpoint'])
                                            Manager.logger.info(
                                                '  {0}@{1} > {0}@{2} ({3})'.
                                                format(remote_dataset,
                                                       previous_snapshot,
                                                       snapshot, size))
                                            ZFS.replicate(
                                                remote_dataset,
                                                previous_snapshot,
                                                snapshot,
                                                dataset,
                                                replicate_settings.get(
                                                    'buffer_size',
                                                    BUFFER_SIZE),
                                                replicate_settings['endpoint'],
                                                direction='pull',
                                                compression=replicate_settings[
                                                    'compression'])
                                            ZFS.hold(dataset, snapshot)
                                            ZFS.hold(
                                                remote_dataset, snapshot,
                                                replicate_settings['endpoint'])
                                            ZFS.release(
                                                dataset, previous_snapshot)
                                            ZFS.release(
                                                remote_dataset,
                                                previous_snapshot,
                                                replicate_settings['endpoint'])
                                            previous_snapshot = snapshot
                            elif push is True and len(local_snapshots) > 0:
                                # No common snapshot
                                if remote_dataset not in remote_snapshots:
                                    # No remote snapshot, full replication
                                    snapshot = local_snapshots[-1]
                                    size = ZFS.get_size(
                                        dataset, None, snapshot)
                                    Manager.logger.info(
                                        '  {0}@         > {0}@{1} ({2})'.
                                        format(dataset, snapshot, size))
                                    ZFS.replicate(
                                        dataset,
                                        None,
                                        snapshot,
                                        remote_dataset,
                                        replicate_settings.get(
                                            'buffer_size', BUFFER_SIZE),
                                        replicate_settings['endpoint'],
                                        direction='push',
                                        compression=replicate_settings[
                                            'compression'])
                                    ZFS.hold(dataset, snapshot)
                                    ZFS.hold(remote_dataset, snapshot,
                                             replicate_settings['endpoint'])
                            elif push is False and remote_dataset in remote_snapshots and len(
                                    remote_snapshots[remote_dataset]) > 0:
                                # No common snapshot
                                if len(local_snapshots) == 0:
                                    # No local snapshot, full replication
                                    snapshot = remote_snapshots[
                                        remote_dataset][-1]
                                    size = ZFS.get_size(
                                        remote_dataset, None, snapshot,
                                        replicate_settings['endpoint'])
                                    Manager.logger.info(
                                        '  {0}@         > {0}@{1} ({2})'.
                                        format(remote_dataset, snapshot, size))
                                    ZFS.replicate(
                                        remote_dataset,
                                        None,
                                        snapshot,
                                        dataset,
                                        replicate_settings.get(
                                            'buffer_size', BUFFER_SIZE),
                                        replicate_settings['endpoint'],
                                        direction='pull',
                                        compression=replicate_settings[
                                            'compression'])
                                    ZFS.hold(dataset, snapshot)
                                    ZFS.hold(remote_dataset, snapshot,
                                             replicate_settings['endpoint'])
                            Manager.logger.info(
                                'Replicating {0} complete'.format(dataset))

                        # Post execution command
                        if dataset_settings['postexec'] is not None:
                            Helper.run_command(dataset_settings['postexec'],
                                               '/')

                    # Cleaning the snapshots (cleaning is mandatory)
                    if today in local_snapshots or yesterday in local_snapshots:
                        Cleaner.clean(dataset, local_snapshots,
                                      dataset_settings['schema'])

                except Exception as ex:
                    Manager.logger.error('Exception: {0}'.format(str(ex)))
Esempio n. 8
0
    def run(settings):
        """
        Executes a single run where certain datasets might or might not be snapshotted
        """

        now = datetime.now()
        today = "{0:04d}{1:02d}{2:02d}".format(now.year, now.month, now.day)

        snapshots = ZFS.get_snapshots()
        datasets = ZFS.get_datasets()
        for dataset in datasets:
            if dataset in settings:
                try:
                    dataset_settings = settings[dataset]
                    local_snapshots = snapshots.get(dataset, [])

                    take_snapshot = dataset_settings["snapshot"] is True
                    replicate = dataset_settings["replicate"] is not None

                    # Decide whether we need to handle this dataset
                    execute = False
                    if take_snapshot is True or replicate is True:
                        if dataset_settings["time"] == "trigger":
                            # We wait until we find a trigger file in the filesystem
                            trigger_filename = "{0}/.trigger".format(dataset_settings["mountpoint"])
                            if os.path.exists(trigger_filename):
                                Manager.logger.info("Trigger found on {0}".format(dataset))
                                os.remove(trigger_filename)
                                execute = True
                        else:
                            trigger_time = dataset_settings["time"].split(":")
                            hour = int(trigger_time[0])
                            minutes = int(trigger_time[1])
                            if (
                                now.hour > hour or (now.hour == hour and now.minute >= minutes)
                            ) and today not in local_snapshots:
                                Manager.logger.info("Time passed for {0}".format(dataset))
                                execute = True

                    if execute is True:
                        # Pre exectution command
                        if dataset_settings["preexec"] is not None:
                            Helper.run_command(dataset_settings["preexec"], "/")

                        if take_snapshot is True:
                            # Take today's snapshotzfs
                            Manager.logger.info("Taking snapshot {0}@{1}".format(dataset, today))
                            ZFS.snapshot(dataset, today)
                            local_snapshots.append(today)
                            Manager.logger.info("Taking snapshot {0}@{1} complete".format(dataset, today))

                        # Replicating, if required
                        if replicate is True:
                            Manager.logger.info("Replicating {0}".format(dataset))
                            replicate_settings = dataset_settings["replicate"]
                            push = replicate_settings["target"] is not None
                            remote_dataset = replicate_settings["target"] if push else replicate_settings["source"]
                            remote_snapshots = ZFS.get_snapshots(remote_dataset, replicate_settings["endpoint"])
                            last_common_snapshot = None
                            if remote_dataset in remote_snapshots:
                                if (
                                    push is True
                                ):  # If pushing, we search for the last local snapshot that is remotely available
                                    for snapshot in local_snapshots:
                                        if snapshot in remote_snapshots[remote_dataset]:
                                            last_common_snapshot = snapshot
                                else:  # Else, we search for the last remote snapshot that is locally available
                                    for snapshot in remote_snapshots[remote_dataset]:
                                        if snapshot in local_snapshots:
                                            last_common_snapshot = snapshot
                            if last_common_snapshot is not None:  # There's a common snapshot
                                previous_snapshot = None
                                if push is True:
                                    for snapshot in local_snapshots:
                                        if snapshot == last_common_snapshot:
                                            previous_snapshot = last_common_snapshot
                                            continue
                                        if previous_snapshot is not None:
                                            # There is a snapshot on this host that is not yet on the other side.
                                            size = ZFS.get_size(dataset, previous_snapshot, snapshot)
                                            Manager.logger.info(
                                                "  {0}@{1} > {0}@{2} ({3})".format(
                                                    dataset, previous_snapshot, snapshot, size
                                                )
                                            )
                                            ZFS.replicate(
                                                dataset,
                                                previous_snapshot,
                                                snapshot,
                                                remote_dataset,
                                                replicate_settings["endpoint"],
                                                direction="push",
                                                compression=replicate_settings["compression"],
                                            )
                                            previous_snapshot = snapshot
                                else:
                                    for snapshot in remote_snapshots[remote_dataset]:
                                        if snapshot == last_common_snapshot:
                                            previous_snapshot = last_common_snapshot
                                            continue
                                        if previous_snapshot is not None:
                                            # There is a remote snapshot that is not yet on the local host.
                                            size = ZFS.get_size(
                                                remote_dataset,
                                                previous_snapshot,
                                                snapshot,
                                                replicate_settings["endpoint"],
                                            )
                                            Manager.logger.info(
                                                "  {0}@{1} > {0}@{2} ({3})".format(
                                                    remote_dataset, previous_snapshot, snapshot, size
                                                )
                                            )
                                            ZFS.replicate(
                                                remote_dataset,
                                                previous_snapshot,
                                                snapshot,
                                                dataset,
                                                replicate_settings["endpoint"],
                                                direction="pull",
                                                compression=replicate_settings["compression"],
                                            )
                                            previous_snapshot = snapshot
                            elif push is True and len(local_snapshots) > 0:
                                # No common snapshot
                                if remote_dataset not in remote_snapshots:
                                    # No remote snapshot, full replication
                                    snapshot = local_snapshots[-1]
                                    size = ZFS.get_size(dataset, None, snapshot)
                                    Manager.logger.info(
                                        "  {0}@         > {0}@{1} ({2})".format(dataset, snapshot, size)
                                    )
                                    ZFS.replicate(
                                        dataset,
                                        None,
                                        snapshot,
                                        remote_dataset,
                                        replicate_settings["endpoint"],
                                        direction="push",
                                        compression=replicate_settings["compression"],
                                    )
                            elif (
                                push is False
                                and remote_dataset in remote_snapshots
                                and len(remote_snapshots[remote_dataset]) > 0
                            ):
                                # No common snapshot
                                if len(local_snapshots) == 0:
                                    # No local snapshot, full replication
                                    snapshot = remote_snapshots[remote_dataset][-1]
                                    size = ZFS.get_size(remote_dataset, None, snapshot, replicate_settings["endpoint"])
                                    Manager.logger.info(
                                        "  {0}@         > {0}@{1} ({2})".format(remote_dataset, snapshot, size)
                                    )
                                    ZFS.replicate(
                                        remote_dataset,
                                        None,
                                        snapshot,
                                        dataset,
                                        replicate_settings["endpoint"],
                                        direction="pull",
                                        compression=replicate_settings["compression"],
                                    )
                            Manager.logger.info("Replicating {0} complete".format(dataset))

                        # Post execution command
                        if dataset_settings["postexec"] is not None:
                            Helper.run_command(dataset_settings["postexec"], "/")

                    # Cleaning the snapshots (cleaning is mandatory)
                    if today in local_snapshots:
                        Cleaner.clean(dataset, local_snapshots, dataset_settings["schema"])
                except Exception as ex:
                    Manager.logger.error("Exception: {0}".format(str(ex)))
Esempio n. 9
0
import os

congress_id = ""
if len(sys.argv) > 3 or len(sys.argv) < 2:
    print("Please Enter valid parameter:")
    print("Parameter: Congress term number")
    print("Option: --skip, avoid data cleaning")
    sys.exit()

if len(sys.argv) == 2:
    congress_id = str(sys.argv[1])
    if os.path.isfile("rawData/" + "speeches_" + congress_id +
                      ".txt") and os.path.isfile("rawData/" + congress_id +
                                                 "_SpeakerMap.txt"):
        print("cleaning ....")
        data_cleaner = Cleaner([congress_id])
        data_cleaner.clean_pipeline()
        print("classifying ....")
        congress_classifier = Classifier([congress_id])
        congress_classifier.base_pipeline()
        print("done.")
        sys.exit()
    else:
        print(
            "There are no speeches and speakerMap text file to process for congress "
            + congress_id)
        print(
            "Please put the target congress raw text data into rawData directory"
        )
        sys.exit()
Esempio n. 10
0
    def run(settings):
        """
        Executes a single run where certain datasets might or might not be snapshotted
        """

        now = datetime.now()
        yda = datetime.now() - timedelta(1)
        today = '{0:04d}{1:02d}{2:02d}'.format(now.year, now.month, now.day)
        yesterday = '{0:04d}{1:02d}{2:02d}'.format(yda.year, yda.month, yda.day)

        snapshots = ZFS.get_snapshots()
        datasets = ZFS.get_datasets()
        for dataset in datasets:
            if dataset in settings:
                try:
                    dataset_settings = settings[dataset]
                    local_snapshots = snapshots.get(dataset, [])

                    take_snapshot = dataset_settings['snapshot'] is True
                    replicate = dataset_settings['replicate'] is not None

                    # Decide whether we need to handle this dataset
                    execute = False
                    if take_snapshot is True or replicate is True:
                        if dataset_settings['time'] == 'trigger':
                            # We wait until we find a trigger file in the filesystem
                            trigger_filename = '{0}/.trigger'.format(dataset_settings['mountpoint'])
                            if os.path.exists(trigger_filename):
                                Manager.logger.info('Trigger found on {0}'.format(dataset))
                                os.remove(trigger_filename)
                                execute = True
                        else:
                            trigger_time = dataset_settings['time'].split(':')
                            hour = int(trigger_time[0])
                            minutes = int(trigger_time[1])
                            if (now.hour > hour or (now.hour == hour and now.minute >= minutes)) and today not in local_snapshots:
                                Manager.logger.info('Time passed for {0}'.format(dataset))
                                execute = True

                    if execute is True:
                        # Pre exectution command
                        if dataset_settings['preexec'] is not None:
                            Helper.run_command(dataset_settings['preexec'], '/')

                        if take_snapshot is True:
                            # Take today's snapshotzfs
                            Manager.logger.info('Taking snapshot {0}@{1}'.format(dataset, today))
                            ZFS.snapshot(dataset, today)
                            local_snapshots.append(today)
                            Manager.logger.info('Taking snapshot {0}@{1} complete'.format(dataset, today))

                        # Replicating, if required
                        if replicate is True:
                            Manager.logger.info('Replicating {0}'.format(dataset))
                            replicate_settings = dataset_settings['replicate']
                            push = replicate_settings['target'] is not None
                            remote_dataset = replicate_settings['target'] if push else replicate_settings['source']
                            remote_snapshots = ZFS.get_snapshots(remote_dataset, replicate_settings['endpoint'])
                            last_common_snapshot = None
                            if remote_dataset in remote_snapshots:
                                if push is True:  # If pushing, we search for the last local snapshot that is remotely available
                                    for snapshot in local_snapshots:
                                        if snapshot in remote_snapshots[remote_dataset]:
                                            last_common_snapshot = snapshot
                                else:  # Else, we search for the last remote snapshot that is locally available
                                    for snapshot in remote_snapshots[remote_dataset]:
                                        if snapshot in local_snapshots:
                                            last_common_snapshot = snapshot
                            if last_common_snapshot is not None:  # There's a common snapshot
                                previous_snapshot = None
                                if push is True:
                                    for snapshot in local_snapshots:
                                        if snapshot == last_common_snapshot:
                                            previous_snapshot = last_common_snapshot
                                            continue
                                        if previous_snapshot is not None:
                                            # There is a snapshot on this host that is not yet on the other side.
                                            size = ZFS.get_size(dataset, previous_snapshot, snapshot)
                                            Manager.logger.info('  {0}@{1} > {0}@{2} ({3})'.format(dataset, previous_snapshot, snapshot, size))
                                            ZFS.replicate(dataset, previous_snapshot, snapshot, remote_dataset, replicate_settings.get('buffer_size', BUFFER_SIZE), replicate_settings['endpoint'], direction='push', compression=replicate_settings['compression'])
                                            ZFS.hold(dataset, snapshot)
                                            ZFS.hold(remote_dataset, snapshot, replicate_settings['endpoint'])
                                            ZFS.release(dataset, previous_snapshot)
                                            ZFS.release(remote_dataset, previous_snapshot, replicate_settings['endpoint'])
                                            previous_snapshot = snapshot
                                else:
                                    for snapshot in remote_snapshots[remote_dataset]:
                                        if snapshot == last_common_snapshot:
                                            previous_snapshot = last_common_snapshot
                                            continue
                                        if previous_snapshot is not None:
                                            # There is a remote snapshot that is not yet on the local host.
                                            size = ZFS.get_size(remote_dataset, previous_snapshot, snapshot, replicate_settings['endpoint'])
                                            Manager.logger.info('  {0}@{1} > {0}@{2} ({3})'.format(remote_dataset, previous_snapshot, snapshot, size))
                                            ZFS.replicate(remote_dataset, previous_snapshot, snapshot, dataset, replicate_settings.get('buffer_size', BUFFER_SIZE), replicate_settings['endpoint'], direction='pull', compression=replicate_settings['compression'])
                                            ZFS.hold(dataset, snapshot)
                                            ZFS.hold(remote_dataset, snapshot, replicate_settings['endpoint'])
                                            ZFS.release(dataset, previous_snapshot)
                                            ZFS.release(remote_dataset, previous_snapshot, replicate_settings['endpoint'])
                                            previous_snapshot = snapshot
                            elif push is True and len(local_snapshots) > 0:
                                # No common snapshot
                                if remote_dataset not in remote_snapshots:
                                    # No remote snapshot, full replication
                                    snapshot = local_snapshots[-1]
                                    size = ZFS.get_size(dataset, None, snapshot)
                                    Manager.logger.info('  {0}@         > {0}@{1} ({2})'.format(dataset, snapshot, size))
                                    ZFS.replicate(dataset, None, snapshot, remote_dataset, replicate_settings.get('buffer_size', BUFFER_SIZE), replicate_settings['endpoint'], direction='push', compression=replicate_settings['compression'])
                                    ZFS.hold(dataset, snapshot)
                                    ZFS.hold(remote_dataset, snapshot, replicate_settings['endpoint'])
                            elif push is False and remote_dataset in remote_snapshots and len(remote_snapshots[remote_dataset]) > 0:
                                # No common snapshot
                                if len(local_snapshots) == 0:
                                    # No local snapshot, full replication
                                    snapshot = remote_snapshots[remote_dataset][-1]
                                    size = ZFS.get_size(remote_dataset, None, snapshot, replicate_settings['endpoint'])
                                    Manager.logger.info('  {0}@         > {0}@{1} ({2})'.format(remote_dataset, snapshot, size))
                                    ZFS.replicate(remote_dataset, None, snapshot, dataset, replicate_settings.get('buffer_size', BUFFER_SIZE), replicate_settings['endpoint'], direction='pull', compression=replicate_settings['compression'])
                                    ZFS.hold(dataset, snapshot)
                                    ZFS.hold(remote_dataset, snapshot, replicate_settings['endpoint'])
                            Manager.logger.info('Replicating {0} complete'.format(dataset))

                        # Post execution command
                        if dataset_settings['postexec'] is not None:
                            Helper.run_command(dataset_settings['postexec'], '/')

                    # Cleaning the snapshots (cleaning is mandatory)
                    if today in local_snapshots or yesterday in local_snapshots:
                        Cleaner.clean(dataset, local_snapshots, dataset_settings['schema'])

                except Exception as ex:
                    Manager.logger.error('Exception: {0}'.format(str(ex)))
Esempio n. 11
0
import sys
from store import Storer
from clean import Cleaner
from csv_dumper import CsvDumper

url_param = sys.argv[-1]

saved_page_loc = Storer.html_page(url_param)

Cleaner.start(saved_page_loc)

CsvDumper.start()
Esempio n. 12
0
File: model.py Progetto: pah8p/ML
    [plot.fitted_histogram, y['LogSalePrice']],
    #	[plot.qq, y['SalePrice']],
    #	[plot.qq, y['Log1SalePrice']],
]
#plot.view(plots)

#y.to_csv('y.csv', index=False)
y_np = y.drop('SalePrice', axis=1).to_numpy()

train_id = x_train['Id']
test_id = x_test['Id']

x_train.drop('Id', axis=1, inplace=True)
x_test.drop('Id', axis=1, inplace=True)

cleaner = Cleaner(x_train, x_test)
cleaner.clean(variables)

#linear = regression.build('Linear')
#linear_cv = regression.cross_validate(linear, cleaner.x_train_np, y_np)
#print('LINEAR', linear_cv)

lasso = regression.build('Lasso', alpha=0.002)
lasso_cv = regression.cross_validate(lasso, cleaner.x_train_np, y_np)

elastic_net = regression.build('ElasticNet', alpha=0.002)
elastic_net_cv = regression.cross_validate(elastic_net, cleaner.x_train_np,
                                           y_np)

kernel_ridge = regression.build('KernelRidge')
kernel_ridge_cv = regression.cross_validate(kernel_ridge, cleaner.x_train_np,