def extract_semi_structured_addresses(address_file, segmentation_input_file, output_file): segment_data = segment.load_data_from_pickle_file(segmentation_input_file) segment.set_segmentation_data(segment_data, factor = 100) address_list = segment.read_pickle_for_segmentation_file(address_file) address_list = map(semi_structured_address, address_list) with open(output_file,'wb') as f: pickle.dump(address_list, f)
def extract_semi_structured_addresses(address_file, segmentation_input_file, output_file): segment_data = segment.load_data_from_pickle_file(segmentation_input_file) segment.set_segmentation_data(segment_data, factor=100) address_list = segment.read_pickle_for_segmentation_file(address_file) address_list = map(semi_structured_address, address_list) with open(output_file, 'wb') as f: pickle.dump(address_list, f)
def create_data_for_processing(feeder_number): extract_data_for_feeder( ALL_FEEDERS_LIST.index(feeder_number), ADDRESSES_FROM_SINGLE_FEEDER ) if not os.path.isfile(ADDRESSES_FROM_SINGLE_FEEDER) else None addresses = segment.read_pickle_for_segmentation_file( ADDRESSES_FROM_SINGLE_FEEDER) segment.extract_segmentation_file_from_text( addresses, DATA_FILE_FOR_WORD_SEGMENTATION, 1, 8) if not os.path.isfile(DATA_FILE_FOR_WORD_SEGMENTATION) else None segment.extract_segmentation_file_from_text( addresses, DATA_FILE_FOR_META_SEGMENTATION, 1, 8, metaphone=True ) if not os.path.isfile(DATA_FILE_FOR_META_SEGMENTATION) else None extract_semi_structured_addresses( ADDRESSES_FROM_SINGLE_FEEDER, DATA_FILE_FOR_WORD_SEGMENTATION, SEMI_STRUCTURED_ADDRESS_LIST ) if not os.path.isfile(SEMI_STRUCTURED_ADDRESS_LIST) else None
def create_data_for_processing(feeder_number): extract_data_for_feeder(ALL_FEEDERS_LIST.index(feeder_number), ADDRESSES_FROM_SINGLE_FEEDER) if not os.path.isfile(ADDRESSES_FROM_SINGLE_FEEDER) else None addresses = segment.read_pickle_for_segmentation_file(ADDRESSES_FROM_SINGLE_FEEDER) segment.extract_segmentation_file_from_text(addresses, DATA_FILE_FOR_WORD_SEGMENTATION, 1, 8) if not os.path.isfile(DATA_FILE_FOR_WORD_SEGMENTATION) else None segment.extract_segmentation_file_from_text(addresses, DATA_FILE_FOR_META_SEGMENTATION, 1, 8, metaphone = True) if not os.path.isfile(DATA_FILE_FOR_META_SEGMENTATION) else None extract_semi_structured_addresses(ADDRESSES_FROM_SINGLE_FEEDER, DATA_FILE_FOR_WORD_SEGMENTATION, SEMI_STRUCTURED_ADDRESS_LIST) if not os.path.isfile(SEMI_STRUCTURED_ADDRESS_LIST) else None