Beispiel #1
0
def get_dataset_keys(request, table):
    """
    Returns JSON containing a list of table keys that have been added for a
    particular dataset

    Parameters:
    table (str) - The uuid of the table being requested

    Returns:
    JsonResponse({'keys': keys}) (str) - A JSON string containing a list of keys
    """

    # Get the session
    session = m.get_session()
    # Get the keys from the requested table ???
    query = session.query(
        m.DATASET_KEYS).filter(m.DATASET_KEYS.dataset_uuid == table)
    # Close the session
    session.close()
    # Get the tabley keys
    df = pd.read_sql(query.statement, query.session.bind)

    # Append and return the keys as JSON
    keys = []
    for index, row in df.iterrows():
        keys.append([row['index_name'], row['dataset_columns']])
    return JsonResponse({'keys': keys})
Beispiel #2
0
def manage_dataset(request, table):
    """
    Return a page for managing table data

    Parameters:
    table (str) - the name of the table to be displayed. This should be a UUID
    tablename (str) - original filename of uploaded table
    """

    # Get a session
    session = m.get_session()
    # Get the name of the file used to create the table being queried
    file_name = str(
        session.query(m.DATASETS.original_filename).filter(
            m.DATASETS.uuid == table).one()[0]
    )  # This returns a list containing a single element(original_filename)
    # The [0] gets the filename out of the list
    # Get the keys from the table being queried
    keys = session.query(
        m.DATASET_KEYS).filter(m.DATASET_KEYS.dataset_uuid == table).all()
    session.close
    # Get the user defined joins for the table
    joins = session.query(m.DATASET_JOINS).filter(
        or_(m.DATASET_JOINS.dataset1_uuid == table,
            m.DATASET_JOINS.dataset2_uuid == table)).all()

    # Render the data management page
    return render(request, 'manage_dataset.html', {
        'tablename': file_name,
        'table': table,
        'keys': keys,
        'joins': joins
    })
def truncate_table(table):
    """
    Truncates the table. Legacy function, but could be useful down the road.

    Parameters:
    table_name (str) - the name of the table to truncate
    """

    session = m.get_session()

    max_id_query = session.query(func.max(table.id).label("last_id")).one()[0]
    min_id_query = session.query(func.min(table.id).label("first_id")).one()[0]
    row_count = session.query(func.count(table.id)).one()[0]

    session.execute("DELETE FROM mircs.\"%s\" WHERE id >= 0" % table.__name__) # Delete all rows
    session.execute("ALTER SEQUENCE mircs.\"%s_id_seq\" RESTART WITH 1" % table.__name__) # Reset the id
    session.commit()



    transaction = m.DATASET_TRANSACTIONS( # Makes a transaction
        dataset_uuid=table.__name__,
        transaction_type=m.transaction_types[4],
        rows_affected=row_count,
        affected_row_ids=range(min_id_query, max_id_query),
    )

    session.add(transaction)
    session.commit()
    session.close()
Beispiel #4
0
def get_pagination_id_range(table, page_number):
    """
    Determine the range of IDs included in a specific page of data. Pages are
    defined as n * settings.DATASET_ITEMS_PER_PAGE to n * (settings.DATASET_ITEMS_PER_PAGE + 1)
    where n is the page_number

    Parameters:
    page_number (int) - The requested page number

    Returns:
    id_range (tuple) - The start and end of the range of database IDs included
                       in the requested page
    page_count (int) - The total number of pages available in the dataset.
                       n / settings.DATASET_ITEMS_PER_PAGE where n is
                       the total number of rows in the dataset
    """
    # Get a session
    session = m.get_session()

    # Get the object for the table we're working with
    table = getattr(m.Base.classes, table)

    # Figure out how many rows are in the dataset and calculate the number of pages
    dataset_count = session.query(func.count(table.id)).one()[0]
    page_count = int(math.ceil(dataset_count /
                               settings.DATASET_ITEMS_PER_PAGE))

    # Calculate the id range covered by the current page
    id_range = (int(page_number) * settings.DATASET_ITEMS_PER_PAGE,
                (int(page_number) + 1) * settings.DATASET_ITEMS_PER_PAGE)

    session.close()
    return id_range, page_count
Beispiel #5
0
def view_dataset(request, table):
    """
    Return a page drawing the requested dataset using an html table

    Parameters:
    table (str) - the name of the table to be displayed. This should be a UUID
    """
    # Get a session
    session = m.get_session()
    # Get the name of the file used to create the table being queried
    file_name = str(session.query(
        m.DATASETS.original_filename
    ).filter(
        m.DATASETS.uuid == table
    ).one()[0])  # This returns a list containing a single element(original_filename)
                 # The [0] gets the filename out of the list
    session.close

    db = Session().connection()
    # Get the first 100 rows of data out of the database for the requested dataset
    df = pd.read_sql("SELECT * FROM " + schema + ".\"" + table + "\" LIMIT 100",
                     db, params={'schema': schema, 'table': table})
    columns = df.columns.tolist()
    rows = convert_nans(df.values.tolist())

    # Render the view dataset page
    return render(request, 'view_dataset.html', {
        'dataset': rows,
        'columns': columns,
        'tablename': file_name,
    })
def add_resource(table, row_id, resource, real_name=None):
    """
    Adds a resource to a row of a dataset so links and files can be linked to
    dataset rows. When dealing with a file, the file is saved in resources folder
    located in the media folder

    Parameters:
    table (str) - the name of the table
    row_id (int) - the id of the row_id
    resource (mixed) - file to save or str
    real_name (str) - real (original) name of the file

    Return:
    (bool) - True if resource was added
    """
    Resource = m.Base.classes.resources
    session = m.get_session()
    table_orm = getattr(m.Base.classes, table)
    count = len((session.query(table_orm).filter(table_orm.id == row_id)).all())
    if count == 1:

        resource_orm = Resource()
        resource_orm.dataset_uuid = table
        resource_orm.row_id = row_id
        if type(resource) is str:
            resource_orm.location = resource
        elif type(resource) is InMemoryUploadedFile:
            resource_name = os.path.basename(resource.name)
            resource_dir = os.path.join(
                os.path.dirname(__file__),
                settings.MEDIA_ROOT,
                'resources'
            )
            try:
                if not os.path.isdir(resource_dir):
                    os.mkdir(resource_dir)
                ext = os.path.splitext(resource_name)[1]
                file_name = '%s.%s' % (uuid.uuid4(), ext)
                full_path = os.path.join(
                    resource_dir,
                    file_name
                )
                new_file = open(full_path, 'w')
                new_file.write(resource.read())
                new_file.close()
                resource_orm.location = full_path
                if real_name is not None:
                    resource_orm.file_name = real_name
                else:
                    resource_orm.file_name = resource_name
            except:
                session.close()
                return False

        session.add(resource_orm)
        session.commit()
        session.close()
        return True
    session.close()
    return False
Beispiel #7
0
def update_dataset(request, table):
    """
    update  dataset to existing table

    Parameters:
    table (str) - the name of the table to be displayed. This should be a UUID
    """
    # If it is POST update the dataset
    if request.method == 'POST':
        df = create_df_from_upload(request)
        key = request.POST.getlist('key')

        orm = getattr(m.Base.classes, table)

        orm_col = orm.__table__.columns
        orm_col = [str(x).split('.')[1] for x in orm_col]
        df_col = [x.replace(' ', '_') for x in df.to_dict().keys()]

        new_cols = [x for x in df_col if x not in orm_col]

        if len(new_cols):
            datatypes = table_generator.get_readable_types_from_dataframe(
                df, return_dict=True)
            datatypes = {
                k: v
                for (k, v) in datatypes.iteritems() if k in new_cols
            }
            table_generator.insert_columns(datatypes, table)

        orm = getattr(m.Base.classes, table)

        successful = table_generator.update_dataset(df, table, key)

        if successful:
            messages.success(request, 'Dataset updated!')
        else:
            messages.error(request, 'Dataset failed to update!')
        return redirect('/manage/' + table)
    else:
        # Upload file form (Used for appending)
        form = Uploadfile()
        # Render the append dataset page
        session = m.get_session()
        # Get all the keys belonging to this dataset
        keys = session.query(
            m.dataset_keys).filter_by(dataset_uuid=table).all()
        keys = [
            {
                'index': x.index_name,
                # Makes strings able to be stored in tags' value attr
                'columns': json.dumps(x.dataset_columns).replace('"', '\'')
            } for x in keys
        ]

        return render(request, 'update_dataset.html', {
            'form': form,
            'table': table,
            'keys': keys
        })
Beispiel #8
0
def join_datasets(request, table):
    """
    Join Datsets
    """
    # If the method is post write the join to the datbase
    if request.method == "POST":
        # Get the POST data
        post_data = dict(request.POST)

        # Get the sqlalchemy sesssion and create the dataset_join object
        session = m.get_session()
        dataset_join = m.DATASET_JOINS(
            dataset1_uuid=post_data['main_dataset'][0],
            index1_name=post_data['main_key'][0],
            dataset2_uuid=post_data['joining_dataset'][0],
            index2_name=post_data['joining_key'][0]
        )
        # Commit the object to the database
        session.add(dataset_join)
        session.commit()
        session.close()
        # Return to the tables dataset manage page
        return redirect('/manage/'+table)
    else:
        # If the Request is GET, get the datasets
        session = m.get_session()
        tables = session.query(
            m.DATASETS.original_filename,
            m.DATASETS.uuid,
            m.DATASETS.upload_date
        ).all()

        # Get the table datasets keys
        keys = session.query(
            m.DATASET_KEYS
        ).filter(
            m.DATASET_KEYS.dataset_uuid == table
        )
        session.close()

        # Return to the manage/join page
        context = {'tables': tables, 'main':table, 'keys': keys}
        return render(request, 'join_datasets.html', context)
Beispiel #9
0
def get_household_members(request, table, person_id):
    """
    get data on members of the same household_ID

    Parameters:
    table(str) - The uuid of the table being requested
    person_id - the id of the person whoose family information is being requested

    Returns:
    JsonResponse (str) - A JSON string containing:
                                                  *data entries related to the person
    """
    # Get a session
    session = m.get_session()

    # Get the object for the table we're working with
    table_id = table
    table = getattr(m.Base.classes, table)

    # Query the table for family information from row with the correct person_id
    query = session.query(
        table.ID_of_Spouse, table.Children_name_ID, table.Mothers_ID,
        table.Fathers_ID,
        table.Siblings_IDs).filter(table.PERSON_ID == person_id)
    #get dataframe of query
    df = pd.read_sql(query.statement, query.session.bind)

    columnList = df.columns.values.tolist()
    spouse_id = ""
    child_ids = ""
    mom_id = ""
    dad_id = ""
    sib_ids = ""
    #get families id information from the dataframe
    for row in df.itertuples():
        spouse_id = row[columnList.index('ID_of_Spouse') + 1]
        child_ids = row[columnList.index('Children_name_ID') + 1]
        mom_id = row[columnList.index('Mothers_ID') + 1]
        dad_id = row[columnList.index('Fathers_ID') + 1]
        sib_ids = row[columnList.index('Siblings_IDs') + 1]
    childlist = []
    if child_ids != None:
        childlist = child_ids.split('; ')
    siblist = []
    if sib_ids != None:
        siblist = sib_ids.split('; ')
    #query the table for rows corresponding to this person and their family
    query = session.query(table).filter(
        or_(table.PERSON_ID == person_id, table.PERSON_ID == mom_id,
            table.PERSON_ID == dad_id, table.PERSON_ID == spouse_id,
            table.PERSON_ID.in_(childlist), table.PERSON_ID.in_(siblist)))
    df = pd.read_sql(query.statement, query.session.bind)
    #return nescessary data
    return JsonResponse({'data': df.to_json(), 'numEntries': len(df.index)})
Beispiel #10
0
def get_dataset_page(request, table, page_number):
    """"
    Get the data for a specific page of a dataset

    Parameters:
    table (str) - The uuid of the table being requested
    page_number (int) - The page being requested

    Returns:
    JsonResponse (str) - A JSON string containing:
                                * median latitude for the current page
                                * median longitude for the current page
                                * pageCount - total number of pages in dataset
                                * rows - a list of rows of data for the current page
                                * columns - a list of columns in the dataset
    """
    # Determines the id range and number of pages needed to display the table
    id_range, page_count = get_pagination_id_range(table, page_number)

    # Get a session
    session = m.get_session()

    # Get the object for the table we're working with
    table = getattr(m.Base.classes, table)

    # Query the table for rows within the correct range
    query = session.query(
        table
    ).filter(
        table.id > id_range[0],
        table.id <= id_range[1]
    )

    # Get a DataFrame with the results of the query
    df = pd.read_sql(query.statement, query.session.bind)

    # Convert everything to the correct formats for displaying
    columns = df.columns.tolist()
    rows = df.values.tolist()
    rows = convert_nans(rows)
    median_lat = df.LATITUDE.median()
    median_lon = df.LONGITUDE.median()

    return JsonResponse({
        'columns': columns,
        'rows': rows,
        'pageCount': page_count,
        'lat': median_lat,
        'lon': median_lon
    })
Beispiel #11
0
def get_connected_resources(request, id, table):
    """
    view information about connected files
    """
    # Get a session
    session = m.get_session()
    resources = getattr(m.Base.classes, 'resources')
    query = session.query(resources).filter(resources.dataset_uuid == table,
                                            resources.row_id == id)
    df = pd.read_sql(query.statement, query.session.bind)
    file_names = []
    columnList = df.columns.values.tolist()
    for row in df.itertuples():
        file_names.append(row[columnList.index('file_name')])
    return JsonResponse({'file_names': file_names})
Beispiel #12
0
def add_dataset_key(request, table):
    """
    Add a key to a dataset
    """
    # If POST add the key
    if request.method == 'POST':
        # Get the POST parameter
        post_data = dict(request.POST)

        key_name = post_data['dataset_key_name'][0]

        dataset_columns = post_data['dataset_columns']

        # Get the table
        t = getattr(m.Base.classes, table)
        # Get the column objects for each selected column in the POST parameter
        column_objects = []
        for col in dataset_columns:
            column_objects.append(getattr(t.__table__.columns, col))

        # Build up a standard name for the index
        index_name = key_name

        # Create an sqlalchemy Index object
        index = Index(index_name, *column_objects)
        index.create(m.engine)

        # Create an entry in dataset_keys
        session = m.get_session()
        dataset_key = m.DATASET_KEYS(dataset_uuid=table,
                                     index_name=index_name,
                                     dataset_columns=dataset_columns)
        session.add(dataset_key)
        session.commit()
        session.close()

        # Redirect to the manage_dataset page
        return redirect('/manage/' + table)
    else:
        # Get the columns in the table and add them to the dropdown in the form
        columns = [
            str(x).split('.')[1]
            for x in getattr(m.Base.classes, table).__table__.columns
        ]
        form = AddDatasetKey(zip(columns, columns))
        # Return the form

        return render(request, 'add_dataset_key.html', {'form': form})
Beispiel #13
0
def get_dataset_columns(request, table):
    # Get a session
    session = m.get_session()
    # Get the object for the table we're working with
    table = getattr(m.Base.classes, table)
    table = session.query(table)
    # Get a DataFrame
    df = pd.read_sql(table.statement, table.session.bind)

    # Convert to a list of strings
    columns = df.columns.tolist()

    session.close()
    return JsonResponse({
        'columns': columns,  #Return the list
    })
Beispiel #14
0
def get_dataset_geojson(request, table, page_number):
    """
    Returns geojson created from the geospatial columns of a given page of a table
    """
    # Get the range of database IDs included in the current page of data as well
    # as the total number of pages
    id_range, page_count = get_pagination_id_range(table, page_number)

    # Get a session
    session = m.get_session()

    t = getattr(m.Base.classes, table)

    # Get geospatial columns
    geo = m.GEOSPATIAL_COLUMNS
    geospatial_columns = session.query(geo.column).filter(geo.dataset_uuid == table).all()
    geo_column_objects = []
    geo_column_names = []
    # Create the geospatial object from the columns
    for col in geospatial_columns:
        geo_column_objects.append(geofunc.ST_AsGeoJSON(getattr(t, col[0])))
        geo_column_names.append(col[0])

    # build up geospatial select functions
    # Note: we're just grabbing the first geospatial column right now. it is explicitly labeled 'geometry'
    #       a picker for geo columns might be desirable someday
    geojson = session.query(t, geo_column_objects[0].label('geometry')).filter(
        t.id > id_range[0],
        t.id <= id_range[1]
    )
    # Get a DataFrame with the results of the query
    data = pd.read_sql(geojson.statement, geojson.session.bind)
    geo_column_names.append('geometry')

    # Build some properly formatted geojson to pass into leaflet
    geojson = []
    for i, r in data.iterrows():
        # Geometry and properties are both required for a 'Feature' object.
        geometry = r['geometry']
        properties = r.drop(geo_column_names).to_dict()
        geojson.append({
            'type': 'Feature',
            'properties': properties,
            'geometry': json.loads(geometry),
            'keys': sorted(properties.keys())
        })
    return JsonResponse(geojson, safe=False)
Beispiel #15
0
def home(request):
    """
    Render a view listing all datasets found in the datasets table in the DB
    """

    # Connect to the session
    session = m.get_session()

    # Create a table map to pass to the html file
    tables = session.query(m.DATASETS.original_filename, m.DATASETS.uuid,
                           m.DATASETS.upload_date).all()
    # Close the session
    session.close()
    # Create a context of the table map to pass to the html file
    context = {'tables': tables}
    # Renders the home page
    return render(request, 'home.html', context)
def update_dataset(df, table, key):
    """
    Takes a dataframe of changes to be made to the data and updates the proper
    rows using a given confidence value.

    Parameters:
    df (pandas.DataFrame) - The dataframe with the proper row values
    table (str) - The name of the table that being updated
    key (list) -   The names of the columns that make up the key

    Returns:
    bool - If the update was successful or not
    """
    df = convert_nans(df)
    num_col = len(df.columns) # Gets the numbers of columns
    orm = getattr(m.Base.classes, table) # Gets the mapper for the table
    session = m.get_session()
    new_rows = df.copy()[df.index == -1]
    for index, row in df.iterrows():
        ands = [] # List of AND statements

        for i in range(len(key)):
            col = key[i].replace(' ', '_')
            ands.append(getattr(orm, col) == getattr(row, col))

        try:
            for col in row.to_dict():
                if str(row[col]).lower() == 'nat':
                    row[col] = None
            res = session.query(orm).filter(and_(*ands)).update(row.to_dict())
        except:
            session.close()
            return False


        if res == 0:
            list_dict = new_rows.T.to_dict().values()
            list_dict.append(row)
            new_rows = pd.DataFrame(list_dict)
    session.commit()
    session.close()
    geospatial_columns = get_geospatial_columns(table)
    if len(new_rows):
        insert_df(new_rows, orm, geospatial_columns)
    return True
Beispiel #17
0
def view_dataset(request, table):
    """
    Return a page drawing the requested dataset using an html table

    Parameters:
    table (str) - the name of the table to be displayed. This should be a UUID
    """
    # Get a session
    session = m.get_session()
    # Get the name of the file used to create the table being queried
    file_name = str(
        session.query(m.DATASETS.original_filename).filter(
            m.DATASETS.uuid == table).one()[0]
    )  # This returns a list containing a single element(original_filename)
    # The [0] gets the filename out of the list
    session.close()
    # Render the view dataset page
    return render(request, 'view_dataset.html', {'tablename': file_name})
def get_geospatial_columns(table_uuid):
    """
    Get a list of geospatial column definitions from the geospatial_columnns table
    for a given table

    Parameters:
    table_uuid (str) - The uuid of an autogenerated database table

    Retruns:
    columns (list) - A list of geospatial column definitions where each element
                     is of the type returned by parse_geospatial_column_string()
    """
    session = m.get_session()
    res = session.query(m.GEOSPATIAL_COLUMNS.column_definition).filter(
        m.GEOSPATIAL_COLUMNS.dataset_uuid == table_uuid
    )
    columns = []
    for col in res:
        columns.append(parse_geospatial_column_string(col[0]))
    session.close()
    return columns
Beispiel #19
0
def download_dataset(request, table):
    """
    Download full database table as .csv file
    Parameters:
    table (str) - the name of the table to be displayed. This should be a UUID
    """

    # Get a session
    session = m.get_session()
    # Get the name of the file used to create the csv file being returned
    file_name = str(
        session.query(m.DATASETS.original_filename).filter(
            m.DATASETS.uuid == table).one()[0]
    )  # This returns a list containing a single element(original_filename)
    # The [0] gets the filename out of the list
    session.close

    db = Session().connection()

    #Create pandas dataframe from table
    df = pd.read_sql("SELECT * FROM " + schema + ".\"" + table + "\"",
                     db,
                     params={
                         'schema': schema,
                         'table': table
                     })

    #content_type tells browser that file is csv
    response = HttpResponse(content_type='text/csv')
    #Content-Disposition tells browser name of file to be downloaded
    response[
        'Content-Disposition'] = 'attachment; filename = export_%s' % file_name
    #convert dataframe to csv
    df.to_csv(response, index=False)

    return response
Beispiel #20
0
def search_dataset(request, table):
    """
    Opens the page that allows the user to search a dataset, and retrieve the input data in post to view it
    """
    if request.method == 'POST':
        # Get session
        session = m.get_session()
        # Get the object for the table we're working with
        table = getattr(m.Base.classes, table)
        table = session.query(table)
        # Get a DataFrame
        df = pd.read_sql(table.statement, table.session.bind)
        # Get the list of columns
        columns = df.columns.tolist()
        # Close the session
        session.close()

        post_data = dict(request.POST)  #Retrieve the post data
        queries = []

        # Load the post data into an array of pairs (columns and respective input strings)
        for col in columns:  #Check each column in the table
            if col in post_data:  #If that column name was returned in the post data, then there is an input for it
                queries.append([
                    col, post_data[col + '_query'][0].encode("ascii")
                ])  #Add the pair

        form = Uploadfile()
        #Call the page to view the results of the search and pass the queries
        return render(request, 'view_dataset_query.html', {
            'queries': queries,
            'form': form
        })
    else:
        # Load the search page
        return render(request, 'search_dataset.html', {})
Beispiel #21
0
def append_dataset(request, table):
    """
    Append dataset to existing table

    Parameters:
    table (str) - the name of the table to be displayed. This should be a UUID
    """
    # If it is POST append the dataset
    if request.method == 'POST':
        # Get the POST data
        post_data = dict(request.POST)
        # Get teh primary key from the posted data
        datatypes = post_data['datatypes'][0].split(',')

        # Figure out the path to the file that was originally uploaded
        absolute_path = os.path.join(
            os.path.dirname(__file__),
            settings.MEDIA_ROOT,
            request.session['temp_filename']  # Use the filepath stored in the session
                                              # from when the user originally uploaded
                                              # the file
        )
        # Use pandas to read the uploaded file as a CSV
        df = pd.read_csv(absolute_path)
        df = convert_time_columns(df)
        # Replace spaces with underscores in the column names to be used in the db table
        df.columns = [x.replace(" ", "_") for x in df.columns]

        # Get a session
        session = m.get_session()
        # Store the table uuid
        table_uuid = table

        # Get the table model
        table = getattr(m.Base.classes, table)

        # Get the current highest row id in the table
        query = session.query(func.max(table.id).label("last_id"))
        idMax = query.one()

        geospatial_columns = table_generator.get_geospatial_columns(table_uuid)

        # Append the to the table with a batch insert
        table_generator.insert_df(df, table, geospatial_columns)

        # Get the new highest row id in the table
        newIdMax = query.one()

        # Create entry in transaction table for append
        transaction = m.DATASET_TRANSACTIONS(
            dataset_uuid=table_uuid,
            transaction_type=m.transaction_types[1],
            rows_affected=len(df.index),
            affected_row_ids=range(idMax[0]+1, newIdMax[0]+1),
        )
        session.add(transaction)
        session.commit()

        # Close the session
        session.close()

        return redirect('/manage/' + table_uuid)
    else:
        # Upload file form (Used for appending)
        form = Uploadfile()
        # Render the append dataset page
        return render(request, 'append_dataset.html', {
            'form': form,
            'table': table
        })
Beispiel #22
0
def get_joined_dataset(request, table, page_number):
    """
    get joined data for specific page of dataset
    Parameters:
    table (str) - The uuid of the table being requested
    page_number (int) - The page being requested
    Returns:
    JsonResponse (str) - A JSON string containing:
                                                  *uuids of databases joined to this one
                                                  *data from those databases joined to entries on the current page
    """
    # Determines the id range and number of pages needed to display the table
    id_range, page_count = get_pagination_id_range(table, page_number)

    # Get a session
    session = m.get_session()

    # Get the object for the table we're working with
    table_id = table
    table = getattr(m.Base.classes, table)

    # Query the table for rows within the correct range
    query = session.query(table).filter(table.id > id_range[0],
                                        table.id <= id_range[1])

    # Get a DataFrame with the results of the query
    df = pd.read_sql(query.statement, query.session.bind)

    #query for joins in which the table is the main dataset
    join_query = session.query(
        m.DATASET_JOINS).filter(m.DATASET_JOINS.dataset1_uuid == table_id)
    #get dataframe of join query
    join_df = pd.read_sql(join_query.statement, join_query.session.bind)
    columnList = join_df.columns.values.tolist()
    joined_results = []
    joined_database_ids = []
    #for every dataset joined to this one
    if len(join_df):
        for row in join_df.itertuples():
            i1_name = row[columnList.index('index1_name') + 1]
            d2_id = row[columnList.index('dataset2_uuid') + 1]
            joined_database_ids.append(d2_id)
            curr_db = d2_id
            i2_name = row[columnList.index('index2_name') + 1]
            #query for the join key from the main table
            d1_key_query = session.query(m.DATASET_KEYS).filter(
                m.DATASET_KEYS.dataset_uuid == table_id,
                m.DATASET_KEYS.index_name == i1_name)
            d1_key_df = pd.read_sql(d1_key_query.statement,
                                    d1_key_query.session.bind)
            print d1_key_df
            for row in d1_key_df.itertuples():
                cols1 = row[3]
            #query for the join key from the joined table
            d2_key_query = session.query(m.DATASET_KEYS).filter(
                m.DATASET_KEYS.dataset_uuid == d2_id,
                m.DATASET_KEYS.index_name == i2_name)
            d2_key_df = pd.read_sql(d2_key_query.statement,
                                    d2_key_query.session.bind)
            for row in d2_key_df.itertuples():
                print row
                cols2 = row[3]
            col_list = df.columns.tolist()
            #for every entry on th dataset page
            for row in df.itertuples():
                matchString = ""
                #build matching parameter
                for x in cols1:
                    sql = "SELECT data_type FROM information_schema.columns WHERE table_name = '%s' AND column_name ='%s'" % (
                        d2_id, x)
                    typeSql = m.engine.execute(sql)
                    for k in typeSql:
                        dt = k[0]
                    if dt != 'character varying':
                        matchString = "%s \"%s\"=%s AND" % (matchString, cols2[
                            cols2.index(x)], row[col_list.index(x) + 1])
                    else:
                        matchString = "%s \"%s\"='%s' AND" % (
                            matchString, cols2[cols2.index(x)],
                            row[col_list.index(x) + 1])
                matchString = matchString[:len(matchString) - 3]
                #retrieve any entry from the joined dataset that corresponds to this entry
                sql_stmt = "SELECT * FROM mircs.\"%s\" WHERE %s" % (
                    d2_id, matchString)
                result = m.engine.execute(sql_stmt)
                #get result of sql query in the form of a dict and append to the final results
                for j in result:
                    rowRes = dict(zip(j.keys(), j))
                    rowRes['dataset'] = curr_db
                    joined_results.append(rowRes)
        return JsonResponse({
            'joined_database_ids':
            json.dumps(joined_database_ids),
            'main_dataset_key':
            cols1,
            'joined_dataset_key':
            cols2,
            'data':
            json.dumps(joined_results)
        })
    else:
        return JsonResponse({})
Beispiel #23
0
def create_table(request):
    """
    Submit the primary key / datatype picking page and create a database table
    from the file that was uploaded by the store_file view
    """

    if request.method == 'POST':
        # Get the POST data
        post_data = dict(request.POST)
        # Get teh primary key from the posted data
        datatypes = post_data['datatypes'][0].split(',')

        # Get an sqlalchemy session automap'ed to the database
        session = m.get_session()

        # Generate a UUID to use as the table name, use replace to remove dashes
        table_uuid = str(uuid.uuid4()).replace("-", "")

        # Parse the string returned from the form
        geospatial_string = post_data['geospatial_columns'][0]
        print len(geospatial_string)
        if len(geospatial_string) > 0:
            geospatial_columns = []
            for col in geospatial_string.split(','):
                geospatial_columns.append(
                    table_generator.parse_geospatial_column_string(col))

            for c in geospatial_columns:
                # Add geospatial columns to the session
                geo_col = m.GEOSPATIAL_COLUMNS(
                    dataset_uuid=table_uuid,
                    column=c['name'],
                    column_definition=c['column_definition'])
                session.add(geo_col)

        # Figure out the path to the file that was originally uploaded
        absolute_path = os.path.join(
            os.path.dirname(__file__),
            settings.MEDIA_ROOT,
            request.session[
                'temp_filename']  # Use the filepath stored in the session
            # from when the user originally uploaded
            # the file
        )
        # Use pandas to read the uploaded file as a CSV
        df = pd.read_csv(absolute_path)
        df = convert_time_columns(df)
        # Replace spaces with underscores in the column names to be used in the db table
        df.columns = [x.replace(" ", "_") for x in df.columns]

        # Create a new dataset to be added
        dataset = m.DATASETS(
            uuid=table_uuid,
            original_filename=request.session['real_filename'],
            upload_date=datetime.datetime.now(),
        )
        # create a new transaction to be added
        ids = [int(i) for i in (df.index + 1).tolist()]

        # Create a transaction to add to transaction table
        transaction = m.DATASET_TRANSACTIONS(
            dataset_uuid=table_uuid,
            transaction_type=m.transaction_types[0],
            rows_affected=len(ids),
            affected_row_ids=ids,
        )

        # Add the dataset and transaction to the session and commit the session
        # to the database
        session.add(dataset)
        session.add(transaction)
        session.commit()

        # Generate a database table based on the data found in the CSV file
        if len(geospatial_string) > 0:
            table_generator.to_sql(df, datatypes, table_uuid, schema,
                                   geospatial_columns)
        else:
            print df.columns
            table_generator.to_sql(df, datatypes, table_uuid, schema)

        session.close()
        return redirect('/')
    else:
        return None
Beispiel #24
0
def get_dataset_query(request, table, queries):
    """
    Uses input data to build a SQL alchemy query and returns the resulting data
    """
    # Get the list of queries
    queries = queries.split("/")[:-1]
    # This list in in the format [col1, query for col1, col2, query for col2, etc...]

    # Get a session
    session = m.get_session()

    # Get the object for the table we're working with
    table = getattr(m.Base.classes, table)

    # Build the query command
    i = 0
    search = "query = session.query(table).filter("
    valid = True
    while i < len(queries) and valid:
        if i != 0:  #Add commas between filters
            search += ", "
        if str(getattr(
                table,
                queries[i]).type) == 'INTEGER':  #Query for integer columns
            if isInt(queries[
                    i +
                    1]):  #If input string is a valid integer, add the filter
                search += "getattr(table, queries[" + str(
                    i) + "]) == int(queries[" + str(i + 1) + "])"
            else:  # If the input is not valid, the query cannot return any results
                valid = False
        elif str(getattr(table, queries[i]).type
                 ) == 'DOUBLE PRECISION':  #Query for decimal columns
            if isFloat(queries[
                    i + 1]):  #If input string is a valid float, add the filter
                search += "getattr(table, queries[" + str(
                    i) + "]) == float(queries[" + str(i + 1) + "])"
            else:  # If the input is not valid, the query cannot return any results
                valid = False
        else:  # Query for string columns
            search += "getattr(table, queries[" + str(
                i) + "]).ilike(\"%\"+queries[" + str(i + 1) + "]+\"%\")"
        i += 2  #Items accessed from list in pairs [col1, quer1, col2, quer2, etc...]

    if not valid:  #If the query had invalid input, return with a false value
        session.close()
        return JsonResponse({'valid': valid})

    search += ")"
    # Query the table
    exec(search)

    # Get a DataFrame with the results of the query
    df = pd.read_sql(query.statement, query.session.bind)

    # Convert everything to the correct formats for displaying
    columns = df.columns.tolist()
    rows = table_generator.convert_nans(df.values.tolist())

    session.close()
    return JsonResponse({'columns': columns, 'rows': rows, 'valid': valid})