예제 #1
0
def upload(project_id):
    '''
    Uploads files to a normalization project. (NB: cannot upload directly to 
    a link type project). 
                                               
    Also creates the mini version of the project
    
    GET:
        - project_id: ID of the normalization project
        
    POST:
        
      file: (csv file) A csv to upload to the chosen normalization project
                  NB: the "filename" property will be used to name the file
      json:
        - module_params:
            - make_mini: (default True) Set to False to NOT create a mini version of the file
            - sample_size
            - randomize
    '''
    # Load project
    proj = ESNormalizer(project_id=project_id) 
    _, module_params = _parse_request()   
    if module_params is None:
        module_params = {}
    make_mini = module_params.get('make_mini', True) # TODO: can remove ?
    
    # Upload data        
    def custom_stream_factory(total_content_length, filename, content_type, content_length=None):
        tmpfile = tempfile.NamedTemporaryFile('wb+', prefix='flaskapp')
        app.logger.info("start receiving file ... filename => " + str(tmpfile.name))
        return tmpfile
    
    _, _, files = werkzeug.formparser.parse_form_data(flask.request.environ, stream_factory=custom_stream_factory)
    
    
    # Upload data
    file_name = files['file'].filename
    stream = files['file'].stream
    
    _, run_info = proj.upload_init_data(stream, file_name)
    
    # Make mini
    if make_mini:
        proj.load_data('INIT', run_info['file_name'])
        proj.make_mini(module_params)
        
        # Write transformations and log # TODO: not clean
        if proj.metadata['has_mini']:
            proj.write_data()
        else:
            proj._write_metadata()
    
    return jsonify(run_info=run_info, project_id=proj.project_id)
예제 #2
0
if __name__ == '__main__':

    assert False

    source_file_name = 'source.csv'
    source_user_given_name = 'my_source.csv'
    ref_file_name = 'ref.csv'

    # Create source
    proj = ESNormalizer(None, create_new=True)
    source_proj_id = proj.project_id

    # Upload files to normalize
    file_path = os.path.join('local_test_data', source_file_name)
    with open(file_path, 'rb') as f:
        proj.upload_init_data(f, source_file_name, source_user_given_name)

    # Create ref
    proj = ESNormalizer(None, create_new=True)
    ref_proj_id = proj.project_id

    # Upload files to normalize
    file_path = os.path.join('local_test_data', ref_file_name)
    with open(file_path, 'rb') as f:
        proj.upload_init_data(f, ref_file_name, ref_file_name)

    # Try deduping
    proj = ESLinker(create_new=True)

    proj.add_selected_project('source', False, source_proj_id)
    proj.add_selected_project('ref', False, ref_proj_id)