Exemple #1
0
  def __init__(self, dataset_cfg, feature_params, collect_wavs=False, verbose=False):

    # parent init
    super().__init__(dataset_cfg, feature_params, collect_wavs=collect_wavs, verbose=verbose)

    # feature extractor
    self.feature_extractor = FeatureExtractor(feature_params=self.feature_params)

    # short vars
    self.N = self.feature_extractor.N
    self.hop = self.feature_extractor.hop

    # create plot plaths if not already exists
    create_folder(list(self.plot_paths.values()))

    # recreate
    if self.dataset_cfg['recreate'] or not check_folders_existance(self.wav_folders, empty_check=True):

      # delete old data
      delete_files_in_path(self.wav_folders, file_ext=self.dataset_cfg['file_ext'])

      # create folder wav folders
      create_folder(self.wav_folders)

      # create sets (specific to dataset)
      self.create_sets()

    # get audio files from sets
    self.get_audiofiles()
    self.get_annotation_files()
Exemple #2
0
    def __init__(self,
                 screen,
                 screen_size,
                 fps,
                 capture_path='./ignore/capture/',
                 frame_path='frames/',
                 frame_name='frame',
                 enabled=True):

        # params
        self.screen = screen
        self.screen_size = screen_size
        self.fps = fps

        # paths
        self.capture_path = capture_path
        self.frame_path = frame_path
        self.frame_name = frame_name

        # enabled
        self.enabled = enabled

        # delete old data
        delete_png_in_path(self.capture_path + self.frame_path)

        # create folder for captured frames
        create_folder([self.capture_path + self.frame_path])

        # vars
        self.actual_frame_num = 0
        self.frame_container = []

        # downsample of fps
        self.downsample = 2
        self.downsample_count = 0
def test_move_folder():
    setup_test_files()

    # delete the working dir inside tmp dir
    create_folder(f'{test_folder_path}/FolderToBeMoved')
    move_folder(f'{test_folder_path}/FolderToBeMoved', f'{test_folder_path}/working/FolderToBeMoved')
    # set up folders list
    tmp_folders = []
    working_folders = []
    # find all folders in the tmp dir and append them to the folders list
    for folder in (objects for objects in os.listdir(f'{test_folder_path}/') if
                   os.path.isdir(os.path.join(f'{test_folder_path}/', objects))):
        tmp_folders.append(folder)

    # find all folders in the tmp dir and append them to the folders list
    for folder in (objects for objects in os.listdir(f'{test_folder_path}/working/') if
                   os.path.isdir(os.path.join(f'{test_folder_path}/working/', objects))):
        working_folders.append(folder)

    assert tmp_folders[0] == 'working'
    assert len(tmp_folders) == 1
    assert working_folders[0] == 'FolderToBeMoved'
    assert len(working_folders) == 1

    teardown_test_files()
    def __init__(self):
        """Generic initialization code."""

        # session folder (create if missing)
        self.session_folder = '../sessions'
        create_folder(self.session_folder)

        # log folder (create if missing)
        self.log_folder = f'{self.session_folder}/logs'
        create_folder(self.log_folder)

        # work folder (create and clear)
        self.work_folder = f'{self.session_folder}/{script_name()}/work'
        clear_folder(self.work_folder)

        # configuration engines
        self.config = None
        self.option = None

        # database
        self.database = None
        self.target_db_conn = None

        # parameter driven
        self.dataset_id = ''
        self.table_name = ''

        # since we start logging before we read config/options we log to known path vs dataset specific path
        log_setup(log_file_name=f'{self.log_folder}/{script_name()}.log')
        log_session_info()
Exemple #5
0
    def __init__(self, screen, cfg_game, frame_name='frame', root_path='./'):

        # arguments
        self.screen = screen
        self.cfg_game = cfg_game
        self.frame_name = frame_name
        self.root_path = root_path

        # shortcuts
        self.screen_size = cfg_game['screen_size']
        self.fps = cfg_game['fps']

        # paths
        self.paths = dict(
            (k, self.root_path + v) for k, v in self.cfg_game['paths'].items())

        # delete old data
        delete_files_in_path([self.paths['frame_path']], file_ext='.png')

        # create folder for captured frames
        create_folder(list(self.paths.values()))

        # vars
        self.actual_frame_num = 0
        self.frame_container = []

        # downsample of fps
        self.downsample = 2
        self.downsample_count = 0
Exemple #6
0
  def extract_features(self):
    """
    extract mfcc features and save them
    """

    print("\n--feature extraction:")

    # create folder structure
    create_folder(self.feature_folders)

    for i, (set_name, wavs, annos) in enumerate(zip(self.set_names, self.set_audio_files, self.set_annotation_files)):

      print("{}) extract set: {} with label num: {}".format(i, set_name, len(wavs)))

      # examples with splits
      n_examples = int(self.dataset_cfg['n_examples'] * self.dataset_cfg['split_percs'][i])

      # extract data
      x, y, t, index = self.extract_mfcc_data(wavs=wavs, annos=annos, n_examples=n_examples, set_name=set_name) if self.feature_params['use_mfcc_features'] else self.extract_raw_data(wavs=wavs, annos=annos, n_examples=n_examples, set_name=set_name)

      # add noise if requested
      if self.dataset_cfg['add_noise'] and self.feature_params['use_mfcc_features']: x, y, index = self.add_noise_to_dataset(x, y, index, n_examples)

      # print label stats
      self.label_stats(y)

      # save mfcc data file
      np.savez(self.feature_files[i], x=x, y=y, t=t, index=index, params=self.feature_params)
      print("--save data to: ", self.feature_files[i])
    def run(self):
        """
        Options
        --onetime[=1] run once, then exit; use if called by an external scheduler.
        --nowait[=1] run immediately without waiting for scheduler to determine execution.
        """

        # exit if __init__ didn't find a valid project file
        if not self.project_name:
            return

        # display application banner
        app_name = script_name()
        print(f'UDP {app_name.title()} {self.project_name}')
        copyright_year = f'{now():%Y}'
        copyright_message = f'Copyright (c) 2018-{copyright_year} Alterra Mountain Company, Inc.'
        print(copyright_message)

        # make sure root sessions folder exists
        create_folder(self.session_folder)

        # since we start logging before we read config/options we log to known path vs dataset specific path
        log_setup(
            log_file_name=f'{self.session_folder}/{self.project_name}.log')
        log_session_info()

        # common setup
        self.setup()

        # application specific startup logic
        self.start()

        # scheduling behavior based on --onetime, --nowait option
        if self.option('onetime') == '1':
            # one-time run; use when this script is being called by an external scheduler
            logger.info('Option(onetime=1): executing once')
            self.main()
        else:
            if self.option('nowait') == '1':
                # no-wait option; execute immediately without waiting for scheduler to initiate
                logger.info(
                    'Option(nowait=1): executing immediately, then following regular schedule'
                )
                self.main()

            # standard wait for scheduled time slot and run logic
            while True:
                self.progress_message('waiting for next job ...')
                if self.schedule.wait():
                    self.main()
                    if self.option('scheduled_onetime') == '1':
                        logger.info(
                            'Option(scheduled_onetime=1): ran once at first scheduled timeslot'
                        )
                        break
                else:
                    break

        self.cleanup()
Exemple #8
0
    def __init__(self,
                 classifier,
                 mic_params,
                 is_audio_record=False,
                 root_path='./'):

        # arguments
        self.classifier = classifier
        self.mic_params = mic_params
        self.is_audio_record = is_audio_record
        self.root_path = root_path

        # plot path
        self.plot_path = self.root_path + self.mic_params['plot_path']

        # create folder for plot path
        create_folder([self.plot_path])

        # shortcuts
        self.feature_params = classifier.feature_params

        # feature extractor
        self.feature_extractor = FeatureExtractor(self.feature_params)

        # windowing params
        self.N, self.hop = self.feature_extractor.N, self.feature_extractor.hop

        # queue
        self.q = queue.Queue()

        # collector
        self.collector = Collector(
            N=self.N,
            hop=self.hop,
            frame_size=self.feature_params['frame_size'],
            update_size=self.mic_params['update_size'],
            frames_post=self.mic_params['frames_post'],
            is_audio_record=self.is_audio_record)

        # device
        self.device = sd.default.device[0] if not self.mic_params[
            'select_device'] else self.mic_params['device']

        # determine downsample
        self.downsample = self.mic_params['fs_device'] // self.feature_params[
            'fs']

        # get input devices
        self.input_dev_dict = self.extract_devices()

        # show devices
        print("\ndevice list: \n", sd.query_devices())
        print("\ninput devs: ", self.input_dev_dict.keys())

        # stream
        self.stream = None

        # change device flag
        self.change_device_flag = False
    def create_ml_folders(self):
        """
		create all necessary folders for ml
		"""

        # create folder
        create_folder(
            list(self.cfg['ml']['paths'].values()) + [self.model_path])
Exemple #10
0
def saveImage(redis_service, host, key):
    image = redis_service.rpop(key)
    if image:
        print('服务器:%s, 获取图片信息:%s' % (host, image))
        image = json.loads(image)
        common.create_folder(image_dir + '/img/' + image['mainid'])
        downloadImage(image['filename'], image['url'])
    else:
        print('服务器:%s 图片数据为空' % host)
    def create(self, resource):
        """Create a container within resource's storage account."""
        self._load_resource(resource)
        blob_folder = self._blob_folder()
        if is_folder(blob_folder):
            logger.warning(self._context("Container already exists"))
        else:
            # create new container
            logger.info(self._context("Creating container"))
            create_folder(blob_folder)

        self.disconnect()
        return is_folder(blob_folder)
Exemple #12
0
    def create_audio_dataset_folders(self):
        """
		create all necessary folders for audio dataset
		"""

        # mfcc paths for output
        mfcc_paths = [
            p + self.param_path_audio_dataset
            for p in list(self.cfg['audio_dataset']['data_paths'].values())
        ]

        # create folder
        create_folder(self.wav_folders_audio_dataset + mfcc_paths +
                      list(self.cfg['audio_dataset']['plot_paths'].values()))
Exemple #13
0
    def create_my_recording_folders(self):
        """
		create all necessary folders for my recordings
		"""

        # output path
        output_path = self.cfg['my_recordings'][
            'out_path_root'] + self.param_path_my_recordings

        # create folder
        create_folder([
            self.cfg['my_recordings']['plot_path'],
            self.cfg['my_recordings']['wav_path'], output_path
        ])
def cumulative_chart_to_file(data, out_folder, category_to_skip):
    c.create_folder(join(out_folder, CHART_FOLDER))
    out_and_in = data.collapsed_data_by_month(category_to_skip)
    diff = map(lambda (a,b) : -a + b, out_and_in)
    cumulative = [diff[0]]
    for i in range(1,len(diff)):
        cumulative.append(cumulative[i - 1] +  diff[i])
    transposed = zip(*out_and_in)
    m_in = list(transposed[1])
    m_out = map(lambda x : -x,list(transposed[0]))
    file_name = draw_chart(
                          TOTAL_CHART_NAME, data.months,
                          (m_in,m_out,diff,cumulative),
                          ('in', 'out', 'diff', TOTAL_LABEL),
                          out_folder, cstyle=LightSolarizedStyle)
    return join(CHART_FOLDER,file_name)
Exemple #15
0
def download_prepack_file(build_number, language):
    build_dst = const.HF_Working_Folder + "\\Build\\" + language + "\\B" + build_number
    filename = 'Prepack.zip'

    if not os.path.exists(build_dst + "\\" + filename):
        if not os.path.exists(build_dst):
            build_dst = common.create_folder(build_dst)
        prepack_download_from_ftp(filename, build_dst, build_number, language)
Exemple #16
0
    def run(self):
        common.create_folder(config.image_dir + '/img')

        while True:
            image = self.redis.rpop('images')
            if image:
                image = json.loads(image)
                print(image)
                common.create_folder(config.image_dir + '/img/' +
                                     image['mainid'])
                #common.create_folder(config.image_dir + '/' + image['mainid'])

                self.__saveImage(image['filename'], image['url'])
                time.sleep(0.5)
            else:
                time.sleep(10)
            pass
def test_create_folder():
    setup_test_files()

    create_folder(f'{test_folder_path}/createdfolder')
    # set up folders list
    folders = []
    # find all folders in the tmp dir and append them to the folders list
    for folder in (objects for objects in os.listdir(f'{test_folder_path}/') if os.path.isdir(os.path.join(f'{test_folder_path}/', objects))):
        folders.append(folder)
    # sort the list alphabetically
    folders.sort()

    # assert that the directories we expect exist
    assert folders[0] == 'createdfolder'
    assert folders[1] == 'working'
    assert len(folders) == 2

    teardown_test_files()
    def __init__(self, tenant):
        # tenant
        self.tenant = force_local_path(tenant.lower())

        # make sure we have a tenant
        create_folder(self.tenant)

        self.ad_file_name = force_file_ext(f'{self.tenant}/ad', 'json')

        # start in a non-authenticated state
        self.identity = None
        self.is_authenticated = False

        # load our directory
        if is_file(self.ad_file_name):
            self._load()
        else:
            self.identities = dict()
def setup_test_files():
    # ensure clean (empty) ../tmp folder
    teardown_test_files()

    # set up files
    create_folder(test_folder_path)
    readonly_file_name = f'{test_folder_path}/readonly.txt'
    readwrite_file_name = f'{test_folder_path}/readwrite.txt'

    # ... OR BEST ... leverage common's save_text()
    save_text(readwrite_file_name, 'Hello world')

    # create a read only file
    save_text(readonly_file_name, 'Hello world')

    # do this type of operation after a file is closed
    os.chmod(readonly_file_name, S_IREAD | S_IRGRP | S_IROTH)

    # create a working dir in tmp folder
    create_folder(f'{test_folder_path}/working')
    def put(self, source_file_name, blob_name):
        """"Upload source file name to blob."""
        if not is_file(source_file_name):
            warning_message = f"Source file does not exist ({source_file_name})"
            logger.warning(self._context(warning_message, blob_name))
            is_success = False
        else:
            logger.debug(self._context(f"Putting {source_file_name}", blob_name))

            # build blob target file and folder names
            blob_folder = self._blob_folder()
            target_file_name = f"{blob_folder}/{blob_name}"
            target_folder = just_path(target_file_name)

            # make sure the blob's target folder exists
            create_folder(target_folder)

            # then copy source file to blob container
            copy_file_if_exists(source_file_name, target_file_name)
            is_success = True
        return is_success
Exemple #21
0
	def run(self, *args, **kwargs):
		"""
		Options
		--onetime[=1] run once, then exit; use if called by an external scheduler.
		--nonwait[=1] run immediately without waiting for scheduler to determine execution.
		"""

		# make sure root sessions folder exists
		create_folder('../sessions')

		# TODO: We start logging before we read config and options so we don't know datapool or anything else.
		# TODO: We should log to a default app log and then SWITCH LOG file over after we process options and
		# TODO: and config files ??? (2018-09-25)
		log_setup(log_file_name=f'../sessions/{script_name()}.log')
		log_session_info()

		self.setup(*args, **kwargs)
		self.start()

		# scheduling behavior based on --onetime, --nowait option
		if self.option('onetime') == '1':
			# one-time run; use when this script is being called by an external scheduler
			logger.info('Option(onetime=1): executing once')
			self.main()
		else:
			if self.option('nowait') == '1':
				# no-wait option; execute immediately without waiting for scheduler to initiate
				logger.info('Option(nowait=1): executing immediately, then following regular schedule')
				self.main()

			# standard wait for scheduled time slot and run logic
			while True:
				if self.schedule.wait():
					self.main()
				else:
					break

		self.cleanup()
def category_chart_to_file(data, out_folder):
    file_names = []
    c.create_folder(join(out_folder, CHART_FOLDER))
    for category in data.categories:
        money = data.collapsed_data_by_month_for(category)
        money_out = zip(*money)[0]
        money_in = zip(*money)[1]
        entries_out = zip(*money)[2]
        entries_in = zip(*money)[3]
        logging.debug("__CHART_FOR " + category + "\n\t" + str(data.months) +
                      "\n\t" + str(money_in) + "\n\t" + str(money_out))
        if all(value == 0 for value in money_in):
            file_name = draw_chart(category, data.months, (money_out,),
                                   (MONEY_OUT_LABEL,), out_folder,
                                   cstyle=LightColorizedStyle,
                                   det_lists=(entries_out,))
        else:
            money_out = map(lambda y : -y, money_out)
            file_name = draw_chart(category, data.months, (money_in, money_out),
                                   (MONEY_IN_LABEL, MONEY_OUT_LABEL), out_folder,
                                   det_lists=(entries_in,entries_out))
        file_names.append(join(CHART_FOLDER, file_name))
    return sorted(file_names)
    def setup(self):
        """Generic setup code."""

        # load standard config
        self.config = ConfigSectionKey('../conf', '../local')
        self.config.load('bootstrap.ini', 'bootstrap')
        self.config.load('init.ini')
        self.config.load('connect.ini')

        # load utility specific options using
        # env variable = UDP_<SCRIPT-NAME>; Option() retrieves command line options
        self.option = Option(f'udp_{script_name()}')

        # create/clear work folder
        self.work_folder = f'{self.session_folder}/{script_name()}/work'
        create_folder(self.work_folder)

        # display application banner
        # TODO: This should be a banner method()
        print(f'UDP {script_name()} utility')
        print(f'Alterra Unified Data Platform')
        copyright_year = f'{now():%Y}'
        copyright_message = f'Copyright (c) 2018-{copyright_year} Alterra Mountain Company, Inc.'
        print(copyright_message)
Exemple #24
0
    def generate_rows(self,
                      dataset_schema=None,
                      dataset_partitioning=None,
                      partition_id=None,
                      records_limit=-1):

        path_datadir_tmp = os.getenv("DIP_HOME") + '/tmp/'
        FOLDER_NAME = 'tmp_census_us_' + self.P_CENSUS_CONTENT

        P_CENSUS_TYPE = self.P_CENSUS_CONTENT[:3]
        CENSUS_TYPE = str(
            census_resources.dict_vintage_[self.P_CENSUS_CONTENT[:3]])

        fields_list = self.P_census_fields.split(',')

        #----------------------------------------- BASE FOLDER

        logger.info('1/6 Creating base folders...')

        common.create_folder(path_datadir_tmp, FOLDER_NAME, False)

        common.create_folder(path_datadir_tmp + '/' + FOLDER_NAME + '/',
                             self.P_CENSUS_LEVEL, False)

        #----------------------------------------- SOURCE HARVESTER

        state_list_ = self.P_state_list_str.split(',')

        state_conversion = common.state_to_2letters_format(
            self.P_STATES_TYPE_NAME, state_list_)

        state_list = state_conversion[0]
        state_list_rejected = state_conversion[1]
        dict_states = state_conversion[2]

        s_found = len(state_list)
        s_rejected = len(state_list_rejected)

        logger.info('----------------------------------------')
        logger.info('First diagnostic on input dataset')
        logger.info('----------------------------------------')
        if s_found > 0:
            logger.info(
                'States expected to be processed if enough records for feature selection:'
            )
            logger.info(state_list)
            logger.info('States rejected:')
            if s_rejected < 60:
                logger.info(state_list_rejected)
            else:
                logger.info(
                    '...too many elements rejected for displaying it in the log...'
                )

            if self.P_USE_PREVIOUS_SOURCES is False:
                logger.info('2/6 Collecting US Census Data...')

            else:
                logger.info('2/6 Re using US Census Data if available...')

            sources_collector = common.us_census_source_collector(
                self.P_USE_PREVIOUS_SOURCES, P_CENSUS_TYPE,
                self.P_CENSUS_CONTENT, self.P_CENSUS_LEVEL, path_datadir_tmp,
                FOLDER_NAME, state_list, dict_states)

            sumlevel_val = sources_collector[0]
            fdef_dir = sources_collector[1]
            geo_header_file = sources_collector[2]
            dict_pattern_files = sources_collector[3]

            geo_header_file_dir = fdef_dir + '/' + geo_header_file
            geo_header = pd.read_excel(geo_header_file_dir,
                                       sheet_name=0,
                                       header=0)  #sheetname

            census_level_code_len = census_resources.dict_level_corresp['v1'][
                self.P_CENSUS_LEVEL]['code_len']

            logger.info('4/6 Generating census...')

            final_output_df = pd.DataFrame()

            for state in state_list:

                logger.info('Processing this state: %s' % (state))

                state_dir = path_datadir_tmp + FOLDER_NAME + '/' + state

                if self.P_CENSUS_LEVEL in ('TRACT', 'BLOCK_GROUP'):
                    ziptocollect = dict_pattern_files['v1']['TB']
                    state_dir_level = state_dir + '/' + 'TRACT_BG_SEG'

                else:
                    ziptocollect = dict_pattern_files['v1']['OT']
                    state_dir_level = state_dir + '/' + 'NO_TRACT_BG_SEG'

                ustate = state.upper()

                state_name = dict_states[state]['attributes'][
                    'state_fullname_w1']
                state_number = dict_states[state]['attributes'][
                    'state_2digits']

                vint = census_resources.dict_vintage_[P_CENSUS_TYPE][
                    self.P_CENSUS_CONTENT]
                master_segment_file = state_dir_level + '/' + vint[
                    'master_segment_file_pattern'] + vint[
                        'vintage_pattern'] + state + '.csv'

                geo_source_df = pd.read_csv(master_segment_file,
                                            sep=',',
                                            header=None,
                                            names=geo_header.columns)
                geo_level_df = geo_source_df[geo_source_df['SUMLEVEL'].isin(
                    sumlevel_val)].copy()
                geo_level_df['GEOID_DKU'] = geo_level_df['GEOID'].map(
                    lambda x: x.split('US')[1])

                geo_level_df[self.P_CENSUS_LEVEL] = geo_level_df[
                    'GEOID_DKU'].map(lambda x: x[:census_level_code_len])

                keep_cols = [
                    'FILEID', 'SUMLEVEL', 'GEOID_DKU', 'STUSAB', 'LOGRECNO'
                ]
                geo_level_df = geo_level_df[keep_cols]
                geo_level_df['STUSAB'] = geo_level_df['STUSAB'].map(
                    lambda x: x.lower())  ## basically the state lower

                del geo_level_df['FILEID']
                del geo_level_df['SUMLEVEL']

                ### added
                n = 0
                for fr in os.listdir(state_dir_level):
                    if fr.startswith(
                            vint['segments_estimations_files_pattern']):
                        n += 1

                segment_list = []
                for i in range(1, n + 1):
                    if i < 10:
                        segment_list.append('000' + str(i))
                    if i in range(10, 100):
                        segment_list.append('00' + str(i))
                    if i >= 100:
                        segment_list.append('0' + str(i))

                nb_segments = len(segment_list)

                i = 1
                for segment_number in segment_list:

                    i = i + 1
                    logger.info('Processing segment: %s/%s' % (i, nb_segments))

                    template_fields_def = census_resources.dict_vintage_[
                        P_CENSUS_TYPE][
                            self.P_CENSUS_CONTENT]['fields_definition']

                    seq_folder_name = template_fields_def['folder_name']

                    ## For taking into account that some vintage like ACS52013 does not have a structure with the template and a folder
                    ## If no template, recreate the same structure as the alternative one.
                    if seq_folder_name == '':
                        seq_folder_name = template_fields_def[
                            'geo_header_template_folder_name']

                    try:
                        HEADER_PATH_FILE = fdef_dir + '/' + seq_folder_name + '/Seq' + str(
                            int(segment_number)
                        ) + template_fields_def['seq_files_extension']
                        header_df = pd.read_excel(
                            HEADER_PATH_FILE,
                            sheet_name=0)  ### 0 = 'E' #sheetname

                    except:
                        HEADER_PATH_FILE = fdef_dir + '/' + seq_folder_name + '/seq' + str(
                            int(segment_number)
                        ) + template_fields_def['seq_files_extension']
                        header_df = pd.read_excel(
                            HEADER_PATH_FILE,
                            sheet_name=0)  ### 0 = 'E' #sheetname

                    ### Adjust the header to fit what we need.
                    kh_list = [
                        'FILEID', 'FILETYPE', 'STUSAB', 'CHARITER', 'SEQUENCE',
                        'LOGRECNO'
                    ]
                    f_list = [x for x in header_df.columns if x not in kh_list]
                    E_list = [x + 'E' for x in f_list]
                    newcolz_list = kh_list + E_list

                    t_ = [c for c in newcolz_list if c in fields_list]

                    if len(t_) > 0:

                        SEGMENT_PATH_FILE = state_dir_level + '/' + vint[
                            'segments_estimations_files_pattern'] + vint[
                                'vintage_pattern'] + state + segment_number + '000.txt'
                        segment_df = pd.read_csv(SEGMENT_PATH_FILE,
                                                 sep=',',
                                                 names=newcolz_list,
                                                 low_memory=False)

                        out_list = kh_list + t_
                        out_list.remove('FILEID')
                        out_list.remove('FILETYPE')
                        out_list.remove('CHARITER')
                        out_list.remove('SEQUENCE')

                        segment_df = segment_df[out_list]

                        geo_level_df = pd.merge(
                            left=geo_level_df,
                            right=segment_df,
                            how='inner',
                            left_on=['STUSAB', 'LOGRECNO'],
                            right_on=['STUSAB', 'LOGRECNO'])

                logger.info('-------------- volumes check------------------')
                logger.info(geo_level_df.groupby('STUSAB').size())
                logger.info('Check Tallies here :')
                logger.info(
                    'https://www.census.gov/geo/maps-data/data/tallies/tractblock.html'
                )
                logger.info('----------------------------------------------')

                #del geo_level_df['STUSAB']
                del geo_level_df['LOGRECNO']

                if self.P_STATES_TYPE_NAME is not 'state_2letters':
                    geo_level_df[self.P_STATES_TYPE_NAME] = dict_states[state][
                        'attributes'][self.P_STATES_TYPE_NAME]

                logger.info('5/6 Building final output...')
                final_output_df = pd.concat((final_output_df, geo_level_df),
                                            axis=0)

            if self.P_DELETE_US_CENSUS_SOURCES is True:

                logger.info('6/6 Removing US Census temp data from: %s' %
                            (path_datadir_tmp + FOLDER_NAME))
                cmd = "rm -rf %s" % (path_datadir_tmp + FOLDER_NAME)
                os.system(cmd)

            else:
                logger.info('6/6 Keeping US Census data sources in: %s' %
                            (path_datadir_tmp + FOLDER_NAME))
                for f in os.listdir(path_datadir_tmp + FOLDER_NAME):
                    if not f.endswith('.zip'):
                        cmd = "rm -rf %s" % (path_datadir_tmp + FOLDER_NAME +
                                             '/' + f)
                        os.system(cmd)

            for i, line in final_output_df.iterrows():
                yield line.to_dict()

        else:
            logger.info('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
            logger.info(
                'US Census CANNOT be built, no states available in your dataset...'
            )
            logger.info('Check the following settings :')
            logger.info(
                '-> are the states in the right format regarding the plugin set by the user ?'
            )
            logger.info('-> is the column really containing states ?')
            logger.info('----------------------------------------')
Exemple #25
0
    def main(self):
        db = None
        try:
            # get job id and table history
            job_history_file_name = f'{self.state_folder_name}/capture.job'
            job_history = JobHistory(job_history_file_name)
            job_history.load()
            job_id = job_history.job_id
            self.job_id = job_id
            logger.info(f'\nCapture job {job_id} for {self.namespace} ...')

            # track job (and table) stats
            self.stats = Stats(f'{self.work_folder_name}/job.log',
                               namespace=self.namespace,
                               job_id=job_id)
            self.stats.start('capture', 'job')

            # track overall job row count and file size
            self.job_row_count = 0
            self.job_file_size = 0

            # create/clear job folders
            create_folder(self.state_folder_name)
            clear_folder(self.work_folder_name)
            clear_folder(self.publish_folder_name)

            # _connect to source database
            db = None
            db_engine = None
            if self.database.platform == 'postgresql':
                db = database.PostgreSQL(self.database)
                db_engine = database.Database('postgresql', db.conn)

            elif self.database.platform == 'mssql':
                db = database.MSSQL(self.database)
                db_engine = database.Database('mssql', db.conn)

            # cursor = db.conn.cursor()

            # determine current timestamp for this job's run

            # get current_timestamp() from source database with step back and fast forward logic
            current_timestamp = self.current_timestamp(db_engine)

            # process all tables
            self.stats.start('extract', 'step')
            for table_name, table_object in self.table_config.sections.items():
                table_history = job_history.get_table_history(table_name)
                self.process_table(db, db_engine, self.database.schema,
                                   table_name, table_object, table_history,
                                   current_timestamp)
            self.stats.stop('extract', self.job_row_count, self.job_file_size)

            # save interim job stats to work_folder before compressing this folder
            self.stats.stop('capture', self.job_row_count, self.job_file_size)
            self.stats.save()

            # compress work_folder files to publish_folder zip file
            self.compress_work_folder()

            # upload publish_folder zip file
            self.upload_to_objectstore()

            # save final stats for complete job run
            self.stats.stop('capture', self.job_row_count, self.job_file_size)
            self.stats.save(f'{self.state_folder_name}/last_job.log')
            self.stats.save()

            # update job_id and table histories
            job_history.save()

            # compress capture_state and save to capture objectstore for recovery
            self.save_recovery_state_file()

            # update schedule's poll message
            last_job_info = f'last job {self.job_id} on {datetime.datetime.now():%Y-%m-%d %H:%M}'
            schedule_info = f'schedule: {self.schedule}'
            self.schedule.poll_message = f'{script_name()}({self.namespace}), {last_job_info}, {schedule_info}'

        # force unhandled exceptions to be exposed
        except Exception:
            logger.exception('Unexpected exception')
            raise

        finally:
            # explicitly close database connection when finished with job
            with contextlib.suppress(Exception):
                db.conn.close()
Exemple #26
0
    else:
        print('服务器:%s 图片数据为空' % host)


def downloadImage(filename, url):
    try:
        filename = image_dir + filename
        print(url)
        request.urlretrieve(url, filename)
        pass
    except Exception as e:
        print(e)


global image_dir
image_dir = config.image_dir
#创建文件夹
common.create_folder(image_dir + '/img')

threads = []

for config in config.redis_services:
    thread = threading.Thread(target=getImage, args=(config, ))
    threads.append(thread)

for t in threads:
    t.setDaemon(True)
    t.start()

t.join()
Exemple #27
0
from pathlib import Path
import os
import numpy as np

from renderer import PbrtRenderer
from common import create_folder
from project_path import root_path

if __name__ == '__main__':
    # Create a folder to store the information.
    output_folder = Path('bunny')
    create_folder(output_folder)

    # The asset folder.
    asset_folder = Path(root_path) / 'asset'

    # Create the render.
    options = {
        'file_name': str(output_folder / 'demo.png'),
        'light_map': 'uffizi-large.exr',
        'sample': 4,
        'max_depth': 4,
        'camera_pos': (0, -2, 0.8),
        'camera_lookat': (0, 0, 0),
        'camera_up': (0, 0, 1),
    }
    renderer = PbrtRenderer(options)

    # Add the bunny to the scene.
    renderer.add_tri_mesh(asset_folder / 'mesh/bunny.obj',
                          transforms=[
Exemple #28
0
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
import requests
#import time
from dataiku.customrecipe import *
import sys
import re
import geocoder_utils
import common
import os

logging.info('1/6 Creating base folder...')
path_datadir_tmp = dataiku.get_custom_variables()["dip.home"] + '/tmp/'
P_CENSUS_CONTENT = 'geocoder'
FOLDER_NAME = 'tmp_census_us_' + P_CENSUS_CONTENT
common.create_folder(path_datadir_tmp, FOLDER_NAME, True)

input_name = get_input_names_for_role('input')[0]

output_ = get_output_names_for_role('output')[0]
output_dataset = dataiku.Dataset(output_)

P_COL_STREET = get_recipe_config()['p_col_street']
P_COL_CITY = get_recipe_config()['p_col_city']
P_COL_STATE = get_recipe_config()['p_col_state']
P_COL_ZIPCODE = get_recipe_config()['p_col_zipcode']

P_BENCHMARK = get_recipe_config()['p_benchmark']
P_VINTAGE = get_recipe_config()['p_vintage']

if P_BENCHMARK == "9":
Exemple #29
0
    def __init__(self,
                 cfg_ml,
                 audio_dataset,
                 batch_archive,
                 net_handler,
                 sub_model_path=None,
                 encoder_label='',
                 root_path='./'):

        # arguments
        self.cfg_ml = cfg_ml
        self.audio_dataset = audio_dataset
        self.batch_archive = batch_archive
        self.net_handler = net_handler
        self.sub_model_path = sub_model_path
        self.encoder_label = encoder_label
        self.root_path = root_path

        # paths
        self.paths = dict(
            (k, self.root_path + v) for k, v in self.cfg_ml['paths'].items())

        # param path ml
        self.param_path_ml = 'bs-{}_it-{}_lr-{}/'.format(
            self.cfg_ml['train_params']['batch_size'],
            self.cfg_ml['train_params']['num_epochs'],
            str(self.cfg_ml['train_params']['lr']).replace('.', 'p'))

        # adv param path
        self.adv_params_path = 'l{}p{}d{}_itl-{}_itp-{}/'.format(
            int(self.cfg_ml['adv_params']['label_train']),
            int(self.cfg_ml['adv_params']['pre_train']),
            int(self.cfg_ml['adv_params']['use_decoder_weights']),
            self.cfg_ml['adv_params']['num_epochs_label'],
            self.cfg_ml['adv_params']['num_epochs_pre'])

        # model path
        self.model_path = self.paths['model'] + self.cfg_ml[
            'nn_arch'] + '/' + self.audio_dataset.param_path + self.param_path_ml

        # create model path before (useful for sub model path changes with ../)
        create_folder([self.model_path])

        # sub dir
        if self.sub_model_path is not None:

            # add param
            if self.sub_model_path.find(cfg_ml['conv_folder']) != -1:
                self.model_path = self.model_path + self.sub_model_path + self.adv_params_path

        # new sub dir for encoder label
        if len(self.encoder_label):
            self.model_path = self.model_path + encoder_label + '/'

        # model path folders
        self.model_path_folders = dict(
            (k, self.model_path + v)
            for k, v in self.cfg_ml['model_path_folders'].items())

        # model file
        self.model_files = [
            self.model_path + model_name + '_' + self.cfg_ml['model_file_name']
            for model_name, v in net_handler.models.items()
        ]
        self.model_pre_files = [
            self.paths['model_pre'] + model_name + '_' + '{}_c-{}.pth'.format(
                self.cfg_ml['nn_arch'], self.batch_archive.n_classes)
            for model_name, v in net_handler.models.items()
        ]

        # encoder decoder available
        enc, dec = net_handler.get_nn_arch_has_conv_coder()
        self.encoder_model_file = self.model_path + self.cfg_ml[
            'encoder_model_file_name'] if enc else None
        self.decoder_model_file = self.model_path + self.cfg_ml[
            'decoder_model_file_name'] if dec else None

        # params and metrics files
        self.params_file = self.model_path + self.cfg_ml['params_file_name']
        self.metrics_file = self.model_path + self.cfg_ml['metrics_file_name']
        self.info_file = self.model_path + self.cfg_ml['info_file_name']
        self.score_file = self.model_path + self.cfg_ml['score_file_name']

        # image list (for adversarial)
        self.img_list = []

        # create ml folders
        create_folder(
            list(self.paths.values()) + [self.model_path] +
            list(self.model_path_folders.values()))

        # config
        logging.basicConfig(filename=self.paths['log'] + 'ml.log',
                            level=logging.INFO,
                            format='%(asctime)s %(message)s')

        # disable unwanted logs
        logging.getLogger(
            'matplotlib.font_manager').disabled, logging.getLogger(
                'matplotlib.colorbar').disabled, logging.getLogger(
                    'matplotlib.animation').disabled = True, True, True

        # load pre trained model
        if self.cfg_ml['load_pre_model']:
            self.net_handler.load_models(model_files=self.model_pre_files)
Exemple #30
0
    def main(self):
        db = None
        try:
            # track dataset name for naming generated files and folders
            self.dataset_name = self.namespace.dataset

            # get job id and table history
            job_history_file_name = f'{self.state_folder}/capture.job'
            job_history = JobHistory(job_history_file_name)
            job_history.load()
            job_id = job_history.job_id
            self.job_id = job_id
            logger.info(f'\nCapture job {job_id} for {self.dataset_name} ...')
            self.progress_message(f'starting job {job_id} ...')

            # track job (and table) metrics
            dataset_id = self.namespace.dataset
            self.events = Events(f'{self.work_folder}/job.log',
                                 dataset_id=dataset_id,
                                 job_id=job_id)
            self.events.start('capture', 'job')

            # track overall job row count and file size
            self.job_row_count = 0
            self.job_data_size = 0

            # create/clear job folders
            create_folder(self.state_folder)
            clear_folder(self.work_folder)
            clear_folder(self.publish_folder)

            # connect to source database
            self.database = self.config(self.project.database_source)
            if self.database.platform == 'postgresql':
                db = database.PostgreSQL(self.database)
                db_engine = database.Database('postgresql', db.conn)
            elif self.database.platform == 'mssql':
                db = database.MSSQL(self.database)
                db_engine = database.Database('mssql', db.conn)
            else:
                raise NotImplementedError(
                    f'Unknown database platform ({self.database.platform})')

            # determine current timestamp for this job's run

            # get current_timestamp() from source database with step back and fast forward logic
            current_timestamp = self.current_timestamp(db_engine)

            # process all tables
            self.events.start('extract', 'step')

            # build dict of table objects indexed by table name
            self.tables = dict()
            for section_name, section_object in self.config.sections.items():
                if section_name.startswith('table:'):
                    table_name = section_name.partition(':')[2]
                    self.tables[table_name] = section_object

            # extract data from each table
            for table_name, table_object in self.tables.items():
                table_history = job_history.get_table_history(table_name)

                # get current_sequence from source database
                if table_object.cdc == 'sequence':
                    current_sequence = db_engine.current_sequence(table_name)
                else:
                    current_sequence = 0

                self.process_table(db, db_engine, self.database.schema,
                                   table_name, table_object, table_history,
                                   current_timestamp, current_sequence)
            self.events.stop('extract', self.job_row_count, self.job_data_size)

            # save interim job metrics to work_folder before compressing this folder
            self.events.stop('capture', self.job_row_count, self.job_data_size)
            self.events.save()

            # compress work_folder files to publish_folder zip file
            self.compress_work_folder()

            # upload publish_folder zip file
            self.upload_to_blobstore()

            # save final metrics for complete job run
            self.events.stop('capture', self.job_row_count, self.job_data_size)
            self.events.save(f'{self.state_folder}/last_job.log')
            self.events.save()

            # update job_id and table histories
            if not self.option('notransfer'):
                # only save job history if we're transferring data to landing
                job_history.save()

            # compress capture_state and save to capture blobstore for recovery
            self.save_recovery_state_file()

            # update schedule's poll message
            last_job_info = f'last job {self.job_id} on {datetime.datetime.now():%Y-%m-%d %H:%M}'
            schedule_info = f'schedule: {self.schedule}'
            self.schedule.poll_message = f'{script_name()}({self.dataset_name}), {last_job_info}, {schedule_info}'

        # force unhandled exceptions to be exposed
        except Exception:
            logger.exception('Unexpected exception')
            raise

        finally:
            # explicitly close database connection when finished with job
            with contextlib.suppress(Exception):
                db.conn.close()
Exemple #31
0
    ]
    df2 = df[keep_columns_list]

    df2_unique = df2.groupby(P_CENSUS_LEVEL_COLUMN).size().reset_index()
    df2_unique = df2_unique[[P_CENSUS_LEVEL_COLUMN]]
    df2_unique[P_CENSUS_LEVEL_COLUMN] = df2_unique[
        P_CENSUS_LEVEL_COLUMN].astype('int64')

    df2n = df2.groupby(P_COLUMN_STATES).size().reset_index()
    df2n.rename(columns={0: 'nb'}, inplace=True)

    #----------------------------------------- BASE FOLDER

    print '1/6 Creating base folders...'

    common.create_folder(path_datadir_tmp, FOLDER_NAME, False)

    common.create_folder(path_datadir_tmp + '/' + FOLDER_NAME + '/',
                         P_CENSUS_LEVEL, False)

    #----------------------------------------- SOURCE HARVESTER

    if P_USE_PREVIOUS_SOURCES is False:
        print '2/6 Collecting US Census Data...'

    else:
        print '2/6 Re using US Census Data if available...'

    sources_collector = common.us_census_source_collector(
        P_USE_PREVIOUS_SOURCES, P_CENSUS_TYPE, P_CENSUS_CONTENT,
        P_CENSUS_LEVEL, path_datadir_tmp, FOLDER_NAME, state_list, dict_states)
Exemple #32
0
    import yaml
    import matplotlib.pyplot as plt
    import soundfile

    from plots import plot_waveform
    from common import create_folder
    from path_collector import PathCollector

    # yaml config file
    cfg = yaml.safe_load(open("./config.yaml"))

    # init path collector
    path_coll = PathCollector(cfg)

    # create folder
    create_folder([cfg['mic_params']['plot_path']])

    # window and hop size
    N, hop = int(
        cfg['feature_params']['N_s'] * cfg['feature_params']['fs']), int(
            cfg['feature_params']['hop_s'] * cfg['feature_params']['fs'])

    # classifier
    classifier = Classifier(path_coll=path_coll, verbose=True)

    # create mic instance
    mic = Mic(classifier=classifier,
              feature_params=cfg['feature_params'],
              mic_params=cfg['mic_params'],
              is_audio_record=True)