def getDatasetObjectsPrimary(dataset_id): user = AuthenticationService.verifySessionAndReturnUser( request.cookies["SID"]) if (Dataset.objects.get( Q(id=dataset_id) & (Q(public=True) | Q(author=user))) != None): filename = dataset_id + ".csv" fileFromS3 = s3.get_object(Bucket="agriworks-user-datasets", Key=filename) dataset = pd.read_csv(fileFromS3["Body"], dtype=str) else: return Response("You do not have access to that dataset.", status=403) if (len(dataset) <= 1000): return Response({ "datasetObjects": DatasetService.buildDatasetObjectsList(dataset) }) else: cacheId = str(uuid4()) DatasetCache[cacheId] = dataset[1000:] return Response({ "datasetObjects": DatasetService.buildDatasetObjectsList(dataset[:1000]), "cacheId": cacheId })
def get(self, datasetId): try: df = DatasetService.getDataset(datasetId) datasetFilter = json.loads(request.args.get("datasetFilter")) for i in datasetFilter: df = df[df[i].isin(datasetFilter[i])] return Response( {"datasetObjects": DatasetService.buildDatasetObjectsList(df)}) except TypeError: return Response("Invalid filter parameters provided.", status=400) except s3.exceptions.NoSuchKey as e: return Response("Invalid dataset requested.", status=400) except Exception as e: return Response(e, status=500)
def get(pageNumber): retList = [] datasets = [] user = AuthenticationService.verifySessionAndReturnUser( request.cookies["SID"]) allDatasets = Dataset.objects.filter(Q(public=True) | Q( author=user)).order_by('-dateCreated') if pageNumber == "all": datasets = allDatasets elif pageNumber == "0": datasets = allDatasets[:16] else: datasetIndex = 16 + 12 * (int(pageNumber) - 1) datasets = allDatasets[datasetIndex:datasetIndex + 12] if len(datasets) == 0: return Response("No datasets matching the query were found", status=400) for dataset in datasets: retList.append(DatasetService.createDatasetInfoObject(dataset)) return Response(retList)
def getDatasetObjectsSubsequent(cacheId): dataset = DatasetCache[cacheId] if (len(dataset) <= 1000): del DatasetCache[cacheId] return Response({ "datasetObjects": DatasetService.buildDatasetObjectsList(dataset) }) else: DatasetCache[cacheId] = dataset[1000:] return Response({ "datasetObjects": DatasetService.buildDatasetObjectsList(dataset[:1000]), "cacheId": cacheId })
def post(self): try: if DatasetService.changeLabel(request): return Response("Succesfully changed label.", status=200) else: return Response("Error changing label.", status=400) except: return Response("Error changing label", status=400)
def getUsersDatasets(): retList = [] user = AuthenticationService.verifySessionAndReturnUser( request.cookies["SID"]) datasets = Dataset.objects.filter(author=user).order_by('-dateCreated') for dataset in datasets: if dataset == None: return Response("No datasets found", status=400) retList.append(DatasetService.createDatasetInfoObject(dataset)) return Response(retList)
def get(self, cacheId): """ Fetch the remaining dataset objects, 1000 or less objects at a time, evict cache if all dataset objects have been fetched for this session (cacheId) """ dataset = DatasetCache[cacheId] if (len(dataset) <= 1000): del DatasetCache[cacheId] return Response({ "datasetObjects": DatasetService.buildDatasetObjectsList(dataset) }) else: DatasetCache[cacheId] = dataset[1000:] return Response({ "datasetObjects": DatasetService.buildDatasetObjectsList(dataset[:1000]), "cacheId": cacheId })
def new(): try: retList = [] user = AuthenticationService.verifySessionAndReturnUser( request.cookies["SID"]) # get users datasets by date created and sort by descending order newDatasets = Dataset.objects(author=user).order_by("-dateCreated")[:5] for dataset in newDatasets: if dataset == None: return Response("No datasets found", status=404) retList.append(DatasetService.createDatasetInfoObject(dataset)) return Response(retList) except Exception as e: print(e) return Response("Couldn't retrieve recent datasets", status=400)
def popular(): try: retList = [] user = AuthenticationService.verifySessionAndReturnUser( request.cookies["SID"]) # sorts the datasets by ascending order datasets = Dataset.objects.filter(Q(author=user) | Q( public=True)).order_by("-views")[:5] for dataset in datasets: if dataset == None: return Response("No datasets found", status=400) retList.append(DatasetService.createDatasetInfoObject(dataset)) return Response(retList) except: return Response("Couldn't retrieve popular datasets", status=400)
def get(self, searchQuery): datasets = [] browseURL = "browse" manageURL = "manage" referrerURL = request.headers["referer"].split('/')[-1] matchedDatasets = [] typeUser = None user = AuthenticationService.verifySessionAndReturnUser( request.cookies["SID"]) try: if searchQuery == "" or searchQuery == " ": raise else: #Perform search only on user datasets if referrerURL == manageURL: userDatasets = Dataset.objects.filter(author=user) matchedDatasets = userDatasets.search_text( searchQuery).order_by('$text_score') typeUser = True #Perform search on all datasets elif referrerURL == browseURL: visibleDatasetsToUser = Dataset.objects.filter( Q(author=user) | Q(public=True)) matchedDatasets = visibleDatasetsToUser.search_text( searchQuery).order_by('$text_score') typeUser = False else: # invalid referrer url return Response( "Error processing search request. Please try again later.", status=400) for dataset in matchedDatasets: datasets.append( DatasetService.createDatasetInfoObject(dataset)) if typeUser: return Response({"datasets": datasets, "type": "user"}) return Response({"datasets": datasets, "type": "all"}) except: return Response( "Unable to retrieve datasets with the given search parameter.", status=400)
def getDataset(datasetId): user = AuthenticationService.verifySessionAndReturnUser( request.cookies["SID"]) dataset = Dataset.objects.get(id=datasetId) if dataset == None: return Response( "Unable to retrieve dataset information. Please try again later.", status=400) if (dataset.public == False and dataset.author != user): return Response("You do not have permission to access that dataset.", status=403) Dataset.objects(id=datasetId).update_one(inc__views=1) AuthenticationService.updateRecentDatasets(request.cookies["SID"], datasetId) return Response( DatasetService.createDatasetInfoObject(dataset, withHeaders=True))
def recent(): try: retList = [] # use cookies to retrieve user user = AuthenticationService.verifySessionAndReturnUser( request.cookies["SID"]) recentDatasetIds = user.recentDatasets[:5] # retrieve the actual datasets from these ids for datasetId in recentDatasetIds: try: retList.append( DatasetService.createDatasetInfoObject( Dataset.objects.get(id=datasetId))) except: continue return Response(retList) except Exception as e: return Response("Couldn't retrieve recent datasets", status=400)
from flask import Blueprint, request, current_app from Response import Response from mongoengine.queryset.visitor import Q from Models.Dataset import Dataset from Services.DatasetService import DatasetService from Services.AuthenticationService import AuthenticationService from Models.User import User import boto3 import botocore import pandas as pd from uuid import uuid4 DatasetService = DatasetService() AuthenticationService = AuthenticationService() dataset = Blueprint("DatasetEndpoints", __name__, url_prefix="/api/dataset") s3 = current_app.awsSession.client('s3') DatasetCache = {} @dataset.route("/list/<pageNumber>", methods=["GET"]) def get(pageNumber): retList = [] datasets = [] user = AuthenticationService.verifySessionAndReturnUser( request.cookies["SID"]) allDatasets = Dataset.objects.filter(Q(public=True) | Q( author=user)).order_by('-dateCreated')