Example #1
0
    def setUp(self):
        # connection for mssql server
        source_db_config = get_config("SOURCE_DATABASE_CREDENTIALS")
        mssql_connection = pyodbc.connect(
            'Driver=' + source_db_config["driver"] + ';'
            'Server=' + source_db_config["server"] + ';'
            'Database=' + source_db_config["database"] + ';'
            'password='******';'
            # 'uid=' + source_db_config["username"] + ';'
        )
        self.mssql_cursor = mssql_connection.cursor()

        # connection for postgresql server
        target_db_config = get_config("TARGET_DATABASE_CREDENTIALS")
        psql_connection = psycopg2.connect(
            user=target_db_config["user"],
            password=target_db_config["password"],
            host=target_db_config["host"],
            port=target_db_config["port"],
            database=target_db_config["database"])
        self.psql_cursor = psql_connection.cursor()
        self.mssql_query = "select top % s  * from filesystem"
        # top % s
        self.mssql_attributes = '10'
        self.mssql_data = []
        self.mssql_cursor.execute(self.mssql_query % self.mssql_attributes)
        columns = [column[0] for column in self.mssql_cursor.description]
        for row in self.mssql_cursor.fetchall():
            self.mssql_data.append(dict(zip(columns, row)))
def snapshot_generic(
        crawlmode=Modes.INVM,
        urls=['stdout://'],
        snapshot_num=0,
        features=config_parser.get_config()['general']['features_to_crawl'],
        options={},
        format='csv',
        overwrite=False,
        namespace='',
        ignore_exceptions=True):

    crawler = features_crawler.FeaturesCrawler(crawl_mode=crawlmode)

    metadata = {
        'namespace': namespace,
        'features': ','.join(map(str, features)),
        'timestamp': int(time.time()),
        'system_type': 'vm',
        'compress': config_parser.get_config()['general']['compress'],
        'overwrite': overwrite,
    }

    output_urls = [
        ('{0}.{1}'.format(u, snapshot_num) if u.startswith('file:') else u)
        for u in urls
    ]

    host_crawl_plugins = plugins_manager.get_host_crawl_plugins(features)

    with Emitter(
            urls=output_urls,
            emitter_args=metadata,
            format=format,
    ) as emitter:
        for (plugin_obj, plugin_args) in host_crawl_plugins:
            try:
                if should_exit:
                    break
                for (key, val,
                     feature_type) in plugin_obj.crawl(**plugin_args):
                    emitter.emit(key, val, feature_type)
            except Exception as exc:
                logger.exception(exc)
                if not ignore_exceptions:
                    raise exc

        # TODO remove this call after we move all features to plugins
        _snapshot_single_frame(emitter=emitter,
                               features=features,
                               options=options,
                               crawler=crawler,
                               ignore_exceptions=ignore_exceptions)
def get_filtered_list_of_containers(options={},
                                    host_namespace=misc.get_host_ipaddr()):
    """
    Returns a partition of all the Container objects currently running in the
    system and set the `namespace` and metadata of these containers.

    The partitioning is given by `partition_strategy`.
    """

    environment = options.get('environment',
                              get_config()['general']['environment'])
    container_opts = {
        'host_namespace': host_namespace,
        'environment': environment,
    }

    user_list = options.get('docker_containers_list', 'ALL')
    partition_strategy = options.get('partition_strategy', {})

    assert (partition_strategy['name'] == 'equally_by_pid')
    process_id = partition_strategy['args']['process_id']
    num_processes = partition_strategy['args']['num_processes']

    filtered_list = []
    containers_list = list_all_containers(user_list, container_opts)
    for _container in containers_list:
        """
        There are docker and non-docker containers in this list. An example of
        a non-docker container is a chromium-browser process.
        TODO(kollerr): the logic that defines whether a container is acceptable
        to a plugin or not should be in the plugin itself.
        """

        default_environment = get_config()['general']['environment']
        if (environment != default_environment
                and not _container.is_docker_container()):
            continue
        """
        The partition strategy is to split all the containers equally by
        process pid. We do it by hashing the long_id of the container.
        """

        _hash = _container.long_id
        num = int(_hash, 16) % int(num_processes)
        if num == process_id:
            filtered_list.append(_container)

    return filtered_list
def snapshot_mesos(
    crawlmode=Modes.MESOS,
    urls=['stdout://'],
    snapshot_num=0,
    features=None,
    options={},
    format='csv',
    overwrite=False,
    namespace='',
    ignore_exceptions=True,
):
    compress = config_parser.get_config()['general']['compress']
    metadata = {
        'namespace': namespace,
        'timestamp': int(time.time()),
        'system_type': 'mesos',
        'compress': compress,
        'overwrite': overwrite,
    }

    output_urls = [
        ('{0}.{1}'.format(u, snapshot_num) if u.startswith('file:') else u)
        for u in urls
    ]

    with Emitter(
            urls=output_urls,
            emitter_args=metadata,
            format=format,
    ) as emitter:
        frame = snapshot_crawler_mesos_frame()
        emitter.emit('mesos', frame)
Example #5
0
def reload_vm_crawl_plugins(
        plugin_places=[misc.execution_path('plugins')],
        features=config_parser.get_config()['general']['features_to_crawl'],
        plugin_mode=config_parser.get_config()['general']['plugin_mode']):
    global vm_crawl_plugins

    if plugin_mode is False:  # aka override via --features CLI
        filter_func = lambda plugin_obj, plugin_name, enabled_plugins: (
            plugin_obj.get_feature() in features)
    else:
        filter_func = lambda plugin_obj, plugin_name, enabled_plugins: (
            plugin_name in enabled_plugins)

    vm_crawl_plugins = list(
        _load_plugins(plugin_places + ['plugins'],
                      category_filter={"crawler": IVMCrawler},
                      filter_func=filter_func))
Example #6
0
 def initialize_deep_sort(self):
     # initialize deep sort object
     cfg = get_config()
     cfg.merge_from_file(self.deep_sort_path)
     use_cuda = torch.cuda.is_available()
     if not use_cuda:
         print("Running in cpu mode which maybe very slow!", UserWarning)
     return build_tracker(cfg, use_cuda=use_cuda)
Example #7
0
def reload_container_crawl_plugins(
        plugin_places=[misc.execution_path('plugins')],
        features=config_parser.get_config()['general']['features_to_crawl'],
        plugin_mode=config_parser.get_config()['general']['plugin_mode']):
    global container_crawl_plugins

    # using --plugin_mode  to override plugins for legacy CLI based invocation

    if plugin_mode is False:  # aka override via --features CLI
        filter_func = lambda plugin_obj, plugin_name, enabled_plugins: (
            plugin_obj.get_feature() in features)
    else:
        filter_func = lambda plugin_obj, plugin_name, enabled_plugins: (
            plugin_name in enabled_plugins)

    container_crawl_plugins = list(
        _load_plugins(plugin_places + ['plugins'],
                      category_filter={"crawler": IContainerCrawler},
                      filter_func=filter_func))
def get_config():
	ret= config_parser.get_config()
	#read the current db name from config file
	try:
		lines = open("dbconfig").readlines()
		ret["dbname"] = lines[0].strip()
	except:
		ret["dbname"] = "ad1"

	return ret
Example #9
0
def main():
    parent_conn, child_conn = Pipe()
    config = config_parser.get_config('./multi_chat.config')

    twitch = twich_irc_async_io.IRC_twicth(config,child_conn)
    twitch.start()

    GG = goodgame.GoodGame_websock(config, child_conn)
    GG.start()

    root = tk.Tk()
    chat_gui_tk.Chat_list(root,parent_conn).pack(fill="both", expand=True)
    root.mainloop()
def snapshot_containers(
    containers,
    urls=['stdout://'],
    snapshot_num=0,
    features=config_parser.get_config()['general']['features_to_crawl'],
    options={},
    format='csv',
    overwrite=False,
    ignore_exceptions=True,
    host_namespace='',
):

    curr_containers = get_filtered_list_of_containers(options, host_namespace)
    deleted = [c for c in containers if c not in curr_containers]
    containers = curr_containers

    for container in deleted:
        if options.get('link_container_log_files', False):
            try:
                container.unlink_logfiles(options)
            except NotImplementedError:
                pass

    logger.debug('Crawling %d containers' % (len(containers)))

    for container in containers:

        logger.info('Crawling container %s %s %s' %
                    (container.pid, container.short_id, container.namespace))

        if options.get('link_container_log_files', False):
            # This is a NOP if files are already linked (which is
            # pretty much always).
            try:
                container.link_logfiles(options=options)
            except NotImplementedError:
                pass

        # no feature crawling
        if 'nofeatures' in features:
            continue
        snapshot_container(urls=urls,
                           snapshot_num=snapshot_num,
                           features=features,
                           options=options,
                           format=format,
                           container=container,
                           overwrite=overwrite)
    return containers
Example #11
0
def load_crawl_plugins(category_filter={},
                       features=['os', 'cpu'],
                       plugin_places=['plugins'],
                       options={}):

    crawl_plugins = get_plugins(category_filter, plugin_places)
    config = config_parser.get_config()

    enabled_plugins = []
    if 'enabled_plugins' in config['general']:
        enabled_plugins = config['general']['enabled_plugins']
        if 'ALL' in enabled_plugins:
            enabled_plugins = [p for p in config['crawlers']]
            # Reading from 'crawlers' section inside crawler.conf
            # Alternatively, 'ALL' can be made to signify
            # all crawlers in plugins/*

    for plugin in crawl_plugins:
        if ((plugin.name in enabled_plugins)
                or (plugin.plugin_object.get_feature() in features)):
            plugin_args = get_plugin_args(plugin, config, options)
            yield (plugin.plugin_object, plugin_args)
Example #12
0
def _load_plugins(plugin_places=[misc.execution_path('plugins')],
                  category_filter={},
                  filter_func=lambda *arg: True):
    pm = PluginManager(plugin_info_ext='plugin')

    # Normalize the paths to the location of this file.
    # XXX-ricarkol: there has to be a better way to do this.
    plugin_places = [misc.execution_path(x) for x in plugin_places]

    pm.setPluginPlaces(plugin_places)
    pm.setCategoriesFilter(category_filter)
    pm.collectPlugins()

    config = config_parser.get_config()
    enabled_plugins = [p for p in config['crawlers']]

    for plugin in pm.getAllPlugins():
        if filter_func(plugin.plugin_object, plugin.name, enabled_plugins):
            plugin_args = {}
            if plugin.name in config['crawlers']:
                plugin_args = config['crawlers'][plugin.name]
            yield (plugin.plugin_object, plugin_args)
Example #13
0
    def __init__(self):
        cfg = get_config()
        cfg.merge_from_file(opt.deep_sort_path)
        use_cuda = torch.cuda.is_available()
        if not use_cuda:
            print("Using CPU")

        desired_classes = [
            'person', 'bicycle', 'car', 'motorbike', 'bus', 'truck'
        ]
        class_names, desired_class_names = read_class_names(
            opt.label_names_path, desired_classes=desired_classes)
        self.class_names = class_names

        self.yolo = Load_Yolo_Model(track_only=desired_class_names,
                                    conf_thres=opt.confidence,
                                    weights=opt.yolo_path)
        self.deepsort = build_tracker(cfg, use_cuda=use_cuda)
        self.log = logging.getLogger()
        self.log.setLevel(logging.INFO)
        self.log.addHandler(TqdmLoggingHandler())
        print("Initialized!")
def _snapshot_single_frame(
    emitter,
    features=config_parser.get_config()['general']['features_to_crawl'],
    options={},
    crawler=None,
    ignore_exceptions=True,
):

    global should_exit

    for feature in features:
        feature_options = options.get(feature, {})
        if should_exit:
            break
        if feature_options is None:
            continue
        try:
            for (key, val) in crawler.funcdict[feature](**feature_options):
                emitter.emit(key, val, feature)
        except Exception as exc:
            logger.exception(exc)
            if not ignore_exceptions:
                raise exc
Example #15
0
def load_emitter_plugins(urls=['stdout://'],
                         format='csv',
                         plugin_places=['plugins']):
    category_filter = {"emitter": IEmitter}

    # getting all emitter plugins from crawelr/plugins/emitters/*
    all_emitter_plugins = get_plugins(category_filter, plugin_places)

    # getting enabled emitter pluggins from crawler.conf file
    conf_enabled_plugins = []
    config = config_parser.get_config()
    if 'enabled_emitter_plugins' in config['general']:
        conf_enabled_plugins = config['general']['enabled_emitter_plugins']
        if 'ALL' in conf_enabled_plugins:
            conf_enabled_plugins = [p for p in config['emitters']]

    for plugin in all_emitter_plugins:
        plugin_obj = plugin.plugin_object
        found_plugin = False
        # iterate over CLI provided emitters
        for url in urls:
            parsed = urlparse.urlparse(url)
            proto = parsed.scheme
            if plugin_obj.get_emitter_protocol() == proto:
                plugin_args = get_emitter_plugin_args(plugin, config)
                plugin_obj.init(url, emit_format=format)
                yield (plugin_obj, plugin_args)
                found_plugin = True
        if found_plugin is True:
            continue
        # iterate over conf provided emitters
        if plugin.name in conf_enabled_plugins:
            plugin_args = get_emitter_plugin_args(plugin, config)
            plugin_obj.init(url=plugin_args.get('url', 'missing_url'),
                            emit_format=plugin_args.get(
                                'format', 'missing_format'))
            yield (plugin_obj, plugin_args)
def _load_plugins(
        category_filter={},
        filter_func=lambda *arg: True,
        features=['os', 'cpu'],
        plugin_places=['plugins'],
        options={}):

    pm = PluginManager(plugin_info_ext='plugin')

    # Normalize the paths to the location of this file.
    # XXX-ricarkol: there has to be a better way to do this.
    plugin_places = [misc.execution_path(x) for x in plugin_places]

    pm.setPluginPlaces(plugin_places)
    pm.setCategoriesFilter(category_filter)
    pm.collectPlugins()

    config = config_parser.get_config()

    enabled_plugins = []
    if 'enabled_plugins' in config['general']:
        enabled_plugins = config['general']['enabled_plugins']
        if 'ALL' in enabled_plugins:
            enabled_plugins = [p for p in config['crawlers']]
            # Reading from 'crawlers' section inside crawler.conf
            # Alternatively, 'ALL' can be made to signify
            # all crawlers in plugins/*

    for plugin in pm.getAllPlugins():
        if filter_func(
                plugin.plugin_object,
                plugin.name,
                enabled_plugins,
                features):
            plugin_args = get_plugin_args(plugin, config, options)
            yield (plugin.plugin_object, plugin_args)
def snapshot_container(
    urls=['stdout://'],
    snapshot_num=0,
    features=config_parser.get_config()['general']['features_to_crawl'],
    options={},
    format='csv',
    overwrite=False,
    container=None,
    ignore_exceptions=True,
):
    global should_exit

    if not container:
        raise ValueError('snapshot_container can only be called with a '
                         'container object already initialized.')

    crawler = features_crawler.FeaturesCrawler(crawl_mode=Modes.OUTCONTAINER,
                                               container=container)

    compress = config_parser.get_config()['general']['compress']
    metadata = options.get('metadata', {})
    extra_metadata = metadata.get('extra_metadata', {})
    extra_metadata_for_all = metadata.get('extra_metadata_for_all', False)

    metadata = {
        'namespace': container.namespace,
        'system_type': 'container',
        'features': ','.join(map(str, features)),
        'timestamp': int(time.time()),
        'compress': compress,
        'container_long_id': container.long_id,
        'container_name': container.name,
        'container_image': container.image,
        'extra': extra_metadata,
        'extra_all_features': extra_metadata_for_all,
        'uuid': str(uuid.uuid4())
    }

    if container.is_docker_container():
        metadata['owner_namespace'] = container.owner_namespace
        metadata['docker_image_long_name'] = container.docker_image_long_name
        metadata['docker_image_short_name'] = container.docker_image_short_name
        metadata['docker_image_tag'] = container.docker_image_tag
        metadata['docker_image_registry'] = container.docker_image_registry

    output_urls = reformat_output_urls(urls, container.short_id, snapshot_num,
                                       overwrite)

    container_crawl_plugins = plugins_manager.get_container_crawl_plugins(
        features=features)
    plugin_mode = config_parser.get_config()['general']['plugin_mode']

    with Emitter(
            urls=output_urls,
            emitter_args=metadata,
            format=format,
    ) as emitter:
        for (plugin_obj, plugin_args) in container_crawl_plugins:
            try:
                if should_exit:
                    break
                for (key, val,
                     typ) in plugin_obj.crawl(container_id=container.long_id,
                                              **plugin_args):
                    emitter.emit(key, val, typ)
            except Exception as exc:
                logger.exception(exc)
                if not ignore_exceptions:
                    raise exc

        # TODO remove this call after we move all features to plugins
        if plugin_mode is False:
            _snapshot_single_frame(emitter=emitter,
                                   features=features,
                                   options=options,
                                   crawler=crawler,
                                   ignore_exceptions=ignore_exceptions)
Example #18
0
parser.add_argument('-s',
                    '--save',
                    dest='save',
                    action='store_true',
                    help='flag for predictions saving')
parser.add_argument('-t',
                    '--tag',
                    dest='tag',
                    help='experiment tag, added to logs name')
parser.add_argument('--test',
                    dest='is_test',
                    action='store_true',
                    help='indicate test run')
args = parser.parse_args()

cfg = get_config(args)
model = get_model(cfg)

# Read data
data_path = path.join(
    DATA_PATH,
    'KiDS/DR4/{train_data}.fits'.format(train_data=cfg['train_data']))
data = process_kids(data_path,
                    columns=COLUMNS_KIDS_ALL + COLUMNS_SDSS,
                    bands=cfg['bands'],
                    cut=cfg['cut'],
                    sdss_cleaning=True)

# Get X and y
X = data[cfg['features']].values
y = data['CLASS'].values
Example #19
0
def main():

    euid = os.geteuid()
    if euid != 0:
        print 'Need to run this as root.'
        exit(1)

    get_config()

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--options',
        dest='options',
        type=str,
        default=None,
        help='JSON dict of crawler options (see README for defaults)')
    parser.add_argument(
        '--url',
        dest='url',
        type=str,
        nargs='+',
        default=None,
        help='Send the snapshot data to URL. Defaults to file://frame',
    )
    parser.add_argument(
        '--namespace',
        dest='namespace',
        type=str,
        nargs='?',
        default=None,
        help='Data source this crawler is associated with. Defaults to '
             '/localhost',
    )
    parser.add_argument(
        '--features',
        dest='features',
        type=csv_list,
        default=get_config()['general']['features_to_crawl'],
        help='Comma-separated list of feature-types to crawl. Defaults to '
             '{0}'.format(get_config()['general']['features_to_crawl']))
    parser.add_argument(
        '--since',
        dest='since',
        type=str,
        choices=[
            'EPOCH',
            'BOOT',
            'LASTSNAPSHOT'],
        default=None,
        help='Only crawl features touched since {EPOCH,BOOT,LASTSNAPSHOT}. '
             'Defaults to BOOT',
    )
    parser.add_argument(
        '--frequency',
        dest='frequency',
        type=int,
        default=None,
        help='Target time period for iterations. Defaults to -1 which '
             'means only run one iteration.')
    parser.add_argument(
        '--compress',
        dest='compress',
        type=str,
        choices=[
            'true',
            'false'],
        default='true' if get_config()['general']['compress'] else 'false',
        help='Whether to GZIP-compress the output frame data, must be one of '
             '{true,false}. Defaults to true',
    )
    parser.add_argument('--logfile', dest='logfile', type=str,
                        default='crawler.log',
                        help='Logfile path. Defaults to crawler.log')
    parser.add_argument(
        '--crawlmode',
        dest='crawlmode',
        type=str,
        choices=[
            Modes.INVM,
            Modes.OUTVM,
            Modes.MOUNTPOINT,
            Modes.OUTCONTAINER,
            Modes.MESOS,
        ],
        default=Modes.INVM,
        help='The crawler mode: '
             '{INVM,OUTVM,MOUNTPOINT,OUTCONTAINER}. '
             'Defaults to INVM',
    )
    parser.add_argument(
        '--mountpoint',
        dest='mountpoint',
        type=str,
        default=get_config()['general']['default_mountpoint'],
        help='Mountpoint location (required for --crawlmode MOUNTPOINT)')
    parser.add_argument(
        '--inputfile',
        dest='inputfile',
        type=str,
        default=None,
        help='Path to file that contains frame data (required for '
             '--crawlmode FILE)')
    parser.add_argument(
        '--format',
        dest='format',
        type=str,
        default='csv',
        choices=['csv', 'graphite', 'json'],
        help='Emitted data format.',
    )
    parser.add_argument(
        '--crawlContainers',
        dest='crawlContainers',
        type=str,
        nargs='?',
        default='ALL',
        help='List of containers to crawl as a list of Docker container IDs. '
             'If this is not passed, then just the host is crawled. '
             'Alternatively the word "ALL" can be used to crawl every '
             'container. "ALL" will crawl all namespaces including the host '
             'itself. This option is only valid for INVM crawl mode. Example: '
             '--crawlContainers 5f3380d2319e,681be3e32661',
    )
    parser.add_argument(
        '--crawlVMs',
        dest='crawl_vm',
        nargs='+',
        default=None,
        help='List of VMs to crawl'
             'Default is \'ALL\' VMs'
             'Currently need following as input for each VM'
             '\'vm_name, kernel_version_long, linux_flavour, arch\''
             'Auto kernel version detection in future, when only vm names'
             '(\'ALL\' by default) would need to be passed'
             'Example --crawlVM'
             'vm1,3.13.0-24-generic_3.13.0-24.x86_64,ubuntu,x86_64'
             'vm2,4.0.3.x86_64,vanilla,x86_64',
    )
    parser.add_argument(
        '--environment',
        dest='environment',
        type=str,
        default=get_config()['general']['environment'],
        help='This speficies some environment specific behavior, like how '
             'to name a container. The way to add a new behavior is by '
             'implementing a plugin (see plugins/cloudsight_environment.py '
             'as an example. Defaults to "cloudsight".',
    )
    parser.add_argument(
        '--pluginmode',
        dest='pluginmode',
        action='store_true',
        default=get_config()['general']['plugin_mode'],
        help='If --pluginmode is given, then only enabled plugins in'
             'config/*.conf are loaded,'
             'else legacy mode is run via CLI'
    )
    parser.add_argument(
        '--plugins',
        dest='plugin_places',
        type=csv_list,
        default=get_config()['general']['plugin_places'],
        help='This is a comma separated list of directories where to find '
             'plugins. Each path can be an absolute, or a relative to the '
             'location of the crawler.py.',
    )
    parser.add_argument(
        '--numprocesses',
        dest='numprocesses',
        type=int,
        default=None,
        help='Number of processes used for container crawling. Defaults '
             'to the number of cores.')
    parser.add_argument(
        '--extraMetadataFile',
        dest='extraMetadataFile',
        type=str,
        default=None,
        help='Json file with data to be annotate all features. It can be used '
             'to append a set of system identifiers to the metadata feature '
             'and if the --extraMetadataForAll')

    parser.add_argument(
        '--extraMetadataForAll',
        dest='extraMetadataForAll',
        action='store_true',
        default=False,
        help='If specified all features are appended with extra metadata.')
    parser.add_argument(
        '--linkContainerLogFiles',
        dest='linkContainerLogFiles',
        action='store_true',
        default=get_config()['general']['link_container_log_files'],
        help='Experimental feature. If specified and if running in '
             'OUTCONTAINER mode, then the crawler maintains links to '
             'container log files.')
    parser.add_argument(
        '--overwrite',
        dest='overwrite',
        action='store_true',
        default=False,
        help='overwrite file type url parameter and strip trailing '
             'sequence number')
    parser.add_argument(
        '--avoidSetns',
        dest='avoid_setns',
        action='store_true',
        default=False,
        help='Avoids the use of the setns() syscall to crawl containers. '
             'Some features like process will not work with this option. '
             'Only applies to the OUTCONTAINER mode'
    )

    args = parser.parse_args()
    params = {}

    params['options'] = {}
    if args.options:
        try:
            _options = json.loads(args.options)
        except (KeyError, ValueError):
            sys.stderr.write('Can not parse the user options json.\n')
            sys.exit(1)
        for (option, value) in _options.iteritems():
            params['options'][option] = value

    options = params['options']

    if args.url:
        params['urls'] = args.url
    if args.namespace:
        params['namespace'] = args.namespace
    if args.features:
        params['features'] = args.features
    if args.since:
        params['since'] = args.since
    if args.frequency is not None:
        params['frequency'] = args.frequency
    options['compress'] = (args.compress in ['true', 'True'])
    params['overwrite'] = args.overwrite
    if args.crawlmode:
        params['crawlmode'] = args.crawlmode

        if args.crawlmode == 'MOUNTPOINT':
            if not args.mountpoint:
                print ('Need to specify mountpoint location (--mountpoint) '
                       'for MOUNTPOINT mode')
                sys.exit(1)
            if os.path.exists(args.mountpoint):
                options['root_dir'] = args.mountpoint
                options['os'] = {}
                options['os']['root_dir'] = args.mountpoint
                options['package'] = {}
                options['package']['root_dir'] = args.mountpoint
                options['file'] = {}
                options['file']['root_dir'] = args.mountpoint
                # To remove args.mountpoint (e.g. /mnt/CrawlDisk) from each
                # reported file path.
                options['file']['root_dir_alias'] = '/'
                options['config'] = {}
                options['config']['root_dir'] = args.mountpoint
                # To remove args.mountpoint (e.g. /mnt/CrawlDisk) from each
                # reported file path.
                options['config']['root_dir_alias'] = '/'

        if args.crawlmode == 'OUTCONTAINER':
            if args.crawlContainers:
                options['docker_containers_list'] = args.crawlContainers
            if not args.numprocesses:
                args.numprocesses = multiprocessing.cpu_count()
            # if args.avoid_setns:
            #    options['os']['avoid_setns'] = args.avoid_setns
            #    options['config']['avoid_setns'] = args.avoid_setns
            #    options['file']['avoid_setns'] = args.avoid_setns
            #    options['package']['avoid_setns'] = args.avoid_setns

        options['avoid_setns'] = args.avoid_setns

        if args.crawlmode == 'OUTVM':
            if args.crawl_vm:
                options['vm_list'] = args.crawl_vm

    if args.format:
        params['format'] = args.format
    if args.environment:
        options['environment'] = args.environment
    options['pluginmode'] = args.pluginmode
    if args.plugin_places:
        options['plugin_places'] = args.plugin_places
    if args.extraMetadataFile:
        options['metadata'] = {}
        metadata = options['metadata']
        metadata['extra_metadata_for_all'] = args.extraMetadataForAll
        try:
            with open(args.extraMetadataFile, 'r') as fp:
                metadata['extra_metadata'] = fp.read()
        except Exception as e:
            print 'Could not read the feature metadata json file: %s' \
                % e
            sys.exit(1)
    options['link_container_log_files'] = args.linkContainerLogFiles

    apply_user_args(options=options)

    start_autonomous_crawler(args.numprocesses, args.logfile, params, options)
Example #20
0
"""##  5. Test"""
from config_parser import get_config
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import math
from sklearn.decomposition import PCA
import random
from actor import Actor
from tqdm import tqdm
from data_generator import DataGenerator
from solution_checker import SolutionChecker
config, _, dir_ = get_config()

config.is_training = False
config.temperature = 1.2 ##### #####

tf.reset_default_graph()
actor = Actor(config) # Build graph

variables_to_save = [v for v in tf.global_variables() if 'Adam' not in v.name] # Save & restore all the variables.
saver = tf.train.Saver(var_list=variables_to_save, keep_checkpoint_every_n_hours=1.0)

with tf.Session() as sess:  # start session
    sess.run(tf.global_variables_initializer()) # Run initialize op
    
    save_path = "save/"+dir_
    saver.restore(sess, save_path+"/actor.ckpt") # Restore variables from disk.
    
    predictions_length, predictions_length_w2opt = [], []
    for i in tqdm(range(10)): # test instance
def snapshot(
        urls=['stdout://'],
        namespace=misc.get_host_ipaddr(),
        features=config_parser.get_config()['general']['features_to_crawl'],
        options={},
        frequency=-1,
        crawlmode=Modes.INVM,
        format='csv',
        overwrite=False,
        first_snapshot_num=0,
        max_snapshots=-1):
    """Entrypoint for crawler functionality.

    This is the function executed by long running crawler processes. It just
    loops sleeping for `frequency` seconds at each crawl interval.  During each
    interval, it collects the features listed in `features`, and sends them to
    the outputs listed in `urls`.

    :param urls: The url used as the output of the snapshot.
    :param namespace: This a pointer to a specific system (e.g. IP for INVM).
    :param features: List of features to crawl.
    :param options: Tree of options with details like what config files.
    :param frequency: Target time period for iterations. -1 means just one run.
    :param crawlmode: What's the system we want to crawl.
    :param format: The format of the frame, defaults to csv.
    """

    global should_exit
    saved_args = locals()
    logger.debug('snapshot args: %s' % (saved_args))

    environment = options.get(
        'environment',
        config_parser.get_config()['general']['environment'])
    plugin_places = options.get(
        'plugin_places',
        config_parser.get_config()['general']['plugin_places'])

    plugin_mode = config_parser.get_config()['general']['plugin_mode']

    plugins_manager.reload_env_plugin(plugin_places=plugin_places,
                                      environment=environment)

    plugins_manager.reload_container_crawl_plugins(plugin_places=plugin_places,
                                                   features=features,
                                                   plugin_mode=plugin_mode)

    plugins_manager.reload_vm_crawl_plugins(plugin_places=plugin_places,
                                            features=features,
                                            plugin_mode=plugin_mode)

    plugins_manager.reload_host_crawl_plugins(plugin_places=plugin_places,
                                              features=features,
                                              plugin_mode=plugin_mode)

    next_iteration_time = None

    snapshot_num = first_snapshot_num

    # Die if the parent dies
    PR_SET_PDEATHSIG = 1
    try:
        libc.prctl(PR_SET_PDEATHSIG, signal.SIGHUP)
        signal.signal(signal.SIGHUP, signal_handler_exit)
    except AttributeError:
        logger.warning('prctl is not available. MacOS is not supported.')

    containers = []

    # This is the main loop of the system, taking a snapshot and sleeping at
    # every iteration.

    while True:

        snapshot_time = int(time.time())

        if crawlmode == Modes.OUTCONTAINER:
            containers = snapshot_containers(
                containers=containers,
                urls=urls,
                snapshot_num=snapshot_num,
                features=features,
                options=options,
                format=format,
                overwrite=overwrite,
                host_namespace=namespace,
            )
        elif crawlmode == Modes.MESOS:
            snapshot_mesos(
                crawlmode=crawlmode,
                urls=urls,
                snapshot_num=snapshot_num,
                options=options,
                format=format,
                overwrite=overwrite,
                namespace=namespace,
            )
        elif crawlmode == Modes.OUTVM:
            snapshot_vms(
                urls=urls,
                snapshot_num=snapshot_num,
                features=features,
                options=options,
                format=format,
                overwrite=overwrite,
                namespace=namespace,
            )
        elif crawlmode in [Modes.INVM, Modes.MOUNTPOINT]:
            snapshot_generic(crawlmode=crawlmode,
                             urls=urls,
                             snapshot_num=snapshot_num,
                             features=features,
                             options=options,
                             format=format,
                             namespace=namespace,
                             overwrite=overwrite)
        else:
            raise NotImplementedError('Crawl mode %s is not implemented' %
                                      crawlmode)

        # Frequency < 0 means only one run.
        if (frequency < 0 or should_exit or snapshot_num == max_snapshots):
            logger.info('Bye')
            break

        time_to_sleep, next_iteration_time = _get_next_iteration_time(
            next_iteration_time, frequency, snapshot_time)
        if time_to_sleep > 0:
            time.sleep(time_to_sleep)

        snapshot_num += 1
Example #22
0
    return videos

def play_with_res(video,res,config,f):
    print("Launching browser...")
    d = get_driver(config['driver'])
    y = YouTube(video,res, d, config['youtube'], f)
    y.play()
    time.sleep(2)

def play_one_video_all_resolutions(config, video):
    f = FFInteractor(config['flowfetch'])
    resolutions = get_playable_resolutions(config,video['url'])
    print("Playing %s in" % (video['title']),resolutions)
    for res in resolutions:
        play_with_res(video,res,config,f)

if __name__ == '__main__':
    ###########testing##########
    # config = get_config()
    # f = FFInteractor(config['flowfetch'])
    # play_with_res("https://www.google.com=1","360p",config,f)
    # import sys
    # sys.exit()
    ############################
    time.sleep(5)
    config = get_config()
    videos = get_video_list_json(config)
    n = min(len(videos),config['no_of_videos'])
    s = min(len(videos),config['starting_index'])
    for video in videos[s:n]:
        play_one_video_all_resolutions(config, video)
Example #23
0
from config_parser import get_config
from env_config import DATA_PATH
from utils import logger, save_catalog
from data import COLUMNS_KIDS_ALL, COLUMNS_SDSS, process_kids
from models import get_model, build_outputs, get_single_problem_predictions, build_ann_validation_data, \
    build_xgb_validation_data

parser = argparse.ArgumentParser()
parser.add_argument('-c', '--config', dest='config_file', required=True, help='config file name')
parser.add_argument('-r', '--read', dest='read', action='store_true', help='flag to read weights instead of training')
parser.add_argument('-s', '--save', dest='save', action='store_true', help='flag for catalog saving')
parser.add_argument('-t', '--tag', dest='tag', help='catalog tag, added to logs name')
parser.add_argument('--test', dest='is_test', action='store_true', help='indicate test run')
args = parser.parse_args()

cfg = get_config(args, is_inference=True)

# Paths to read weights if args.read is set
if cfg['pred_class']:
    weights_path = 'outputs/inf_models/KiDS_DR4_x_SDSS_DR14_ann_clf_f-all__2020-06-08_15:22:15.hdf5'
else:
    weights_path = 'outputs/inf_models/KiDS_DR4_x_SDSS_DR14_ann_z_f-all_spec-qso__2020-06-08_16:22:38.hdf5'

# Limit rows to read from data in case of a test run
n_rows = 4000 if cfg['is_test'] else None

# Define data paths
data_path_train = path.join(DATA_PATH, 'KiDS/DR4/{train_data}.fits'.format(train_data=cfg['train_data']))
data_path_pred = path.join(DATA_PATH, 'KiDS/DR4/{inference_data}.fits'.format(inference_data=cfg['inference_data']))

# Read train data
def snapshot_vms(
        urls=['stdout://'],
        snapshot_num=0,
        features=config_parser.get_config()['general']['features_to_crawl'],
        options={},
        format='csv',
        overwrite=False,
        namespace='',
        ignore_exceptions=True):

    # Default will become ALL from None, when auto kernel detection
    # gets merged
    vm_list = options.get('vm_list', None)

    if vm_list is None:
        raise ValueError('need list of VMs (with descriptors) to crawl!')
        # When None gets changed to ALL, this will not be raised

    # convert VM descriptor for each VM to
    # (vm_name, qemu_pid, kernel_version_long, distro, arch)
    # from input type: 'vm_name, kernel_version_long, distro, arch'
    vm_list = sanitize_vm_list(vm_list)

    for vm in vm_list:
        vm_name = vm[0]
        vm = vm[1:]

        crawler = features_crawler.FeaturesCrawler(crawl_mode=Modes.OUTVM,
                                                   vm=vm)

        metadata = {
            'namespace': namespace,
            'features': ','.join(map(str, features)),
            'timestamp': int(time.time()),
            'system_type': 'vm',
            'compress': config_parser.get_config()['general']['compress'],
            'overwrite': overwrite,
        }

        output_urls = reformat_output_urls(urls, vm_name, snapshot_num,
                                           overwrite)

        vm_crawl_plugins = plugins_manager.get_vm_crawl_plugins(features)
        plugin_mode = config_parser.get_config()['general']['plugin_mode']

        with Emitter(
                urls=output_urls,
                emitter_args=metadata,
                format=format,
        ) as emitter:
            for (plugin_obj, plugin_args) in vm_crawl_plugins:
                try:
                    if should_exit:
                        break
                    for (key, val,
                         feature_type) in plugin_obj.crawl(vm_desc=vm,
                                                           **plugin_args):
                        emitter.emit(key, val, feature_type)
                except Exception as exc:
                    logger.exception(exc)
                    if not ignore_exceptions:
                        raise exc

            # TODO remove this call after we move all features to plugins
            if plugin_mode is False:
                _snapshot_single_frame(emitter=emitter,
                                       features=features,
                                       options=options,
                                       crawler=crawler,
                                       ignore_exceptions=ignore_exceptions)
Example #25
0
from flask import Flask,request,Response,make_response,url_for,render_template

import plivo

from config_parser import get_config

from helper import rent_number

from model import create_forward_entry,get_details_from,get_mobile

configs=get_config()

PLIVO_API=plivo.RestAPI(configs["AUTH_ID"],configs["AUTH_TOKEN"])



CALLER_ID="19512977322"
BASE_URL="http://ancient-taiga-3101.herokuapp.com"
app = Flask(__name__)

app.debug=True

@app.route('/')

def index():
	return render_template('index.html') 

@app.route('/save')

def save():
	sip=request.args.get('sip','')
Example #26
0
import plivo
from config_parser import get_config

config=get_config()
def call(number,message): 
	auth_id = config['AUTH_ID']
	auth_token =config['AUTH_TOKEN']
	p = plivo.RestAPI(auth_id, auth_token)

	params = {
    	'from': '1212121212', 
    	'to': number,
    	'answer_url' : config['BASE_URL']+'/answer?message='+message,
    	'answer_method' : "GET",
	}
	response=p.make_call(params)
	return response

def add_message(message):
	plivo_response=plivo.Response()
	plivo_response.addSpeak(message,loop=1)
	return plivo_response.to_xml()