Beispiel #1
0
        def delete(self, src_id):
            '''
            Deletes specified source.
            This will permanently remove this source from the system. USE CAREFULLY!
            '''
            client = db_client()
            col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME)
            try:
                src = find_source(col, src_id)

            except IndexError:
                return 'No resource at that URL.', 404

            else:
                try:
                    matrices = src['matrices']
                except KeyError:
                    logging.info('No Matrices for source %s on delete', src_id)
                else:
                    client = db_client()
                    rescol = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME)
                    for mat in matrices:
                        mat_id = mat['id']
                        # this subtree is deleted when the DATALOADER_PATH/src_id gets removed later
                        # shutil.rmtree(os.path.join(DATALOADER_PATH, src_id, mat_id))
                        try:
                            logging.info('going to remove %s/%s', RESPATH, mat_id)
                            shutil.rmtree(os.path.join(RESPATH, mat_id))
                            rescol.remove({'src_id':mat_id})

                        except Exception as ex:
                            logging.error('could not remove matrix results %s while deleting source %s exception:%s', mat_id, src_id, ex)

                # uses the dataloader opal deletion function
                try:
                    utils.delete(src)
                except Exception as ex:
                    err = 'Dataloader opal failed to delete source: %s Exception: %s'%(src,ex)
                    logging.error(err)
                    return err, 500
                try:
                    col.remove({'src_id':src_id})
                except:
                    return 'Failed to remove source from database', 500
                try:
                    shutil.rmtree(os.path.join(DATALOADER_PATH, src_id))
                except:
                    return 'Failed to delete source from disk', 500
                return 'Deleted Source: %s'%src_id, 204
Beispiel #2
0
                def get(self, src_id, mat_id):
                    '''
                    Returns features for the specified matrix.
                    Features are the names for the columns within the matrix.
                    '''
                    client = db_client()
                    col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME)
                    try:
                        matrix = find_matrix(col, src_id, mat_id)
                    except IndexError:
                        return 'No resource at that URL.', 404
                    except AssertionError:
                        return 'Bad Mongo Query', 500
                    else:
                        rootdir = matrix['rootdir']
                        features_filepath = rootdir + 'features.txt'
                        try:
                            with open(features_filepath) as features_file:
                                features = features_file.read().split("\n")
                                features.pop()
                            response = features
                        except IOError:
                            response = []

                        return response
Beispiel #3
0
        def post(self, src_id):
            '''
            Generate a matrix from the source stored at that ID.
            Returns metadata for that matrix.
            '''
            try:
                posted_data = request.get_json(force=True)
                client = db_client()
                col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME)

                try:
                    src = find_source(col, src_id)

                except IndexError:
                    return 'No resource at that URL.', 404

                error, matricesNew = utils.ingest(posted_data, src)

                if error:
                    return 'Unable to create matrix.', 406

                matrices = []
                for each in src['matrices']:
                    matrices.append(each)
                matrices.extend(matricesNew)
                col.update({'src_id':src_id}, { '$set': {'matrices': matrices} })
            except:
                tb = traceback.format_exc()
                return tb, 406
            return matricesNew, 201
Beispiel #4
0
 def get(self):
     '''
     Returns a list of generated matrices.
     '''
     client = db_client()
     col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME)
     explorables = list(col.find({},{"_id":0}))
     return explorables
Beispiel #5
0
        def get(self, group_name):
            '''
            Returns a list of sources within a particular group.
            '''
            client = db_client()
            col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME)
            sources = list(col.find({'group_name':group_name},{"_id":0}))

            return sources
Beispiel #6
0
 def get(self, src_id, param1):
     client = db_client()
     col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME)
     try:
         src = find_source(col, src_id)
     except IndexError:
         return 'No resource at that URL.', 404
     filepath = src['rootdir']
     return utils.custom(src['ingest_id'], filepath, param1=param1, request=request.args)
Beispiel #7
0
 def post(self, src_id, param1=None, param2=None, param3=None):
     client = db_client()
     col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME)
     try:
         src = find_source(col, src_id)
     except IndexError:
         return 'No resource at that URL.', 404
     filepath = src['rootdir']
     return utils.custom(src['ingest_id'], filepath, param1=param1, param2=param2, param3=param3, payload=request.get_json())
Beispiel #8
0
    def get(self):
        '''
        Returns a list of available sources.
        All sources registered in the system will be returned.
        '''
        client = db_client()
        col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME)
        sources = list(col.find({},{"_id":0,"stash":0}))

        return sources
Beispiel #9
0
        def get(self):
            '''
            Returns a list of groups available.
            '''
            client = db_client()
            col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME)
            groups = col.aggregate([{"$group":{"_id": "$group_name"}}])
            response = [src["_id"] for src in groups]

            return response
Beispiel #10
0
    def get(self):
        '''
        Returns a list of available ingest modules.
        All ingest modules registered in the system will be returned. If you believe there is an ingest module that exists in the system but is not present here, it is probably not registered in the MongoDB database.
        '''
        client = db_client()
        col = db_collection(client, DATALOADER_DB_NAME, INGEST_COL_NAME)
        ingest = list(col.find({},{"_id":0}))

        return ingest
Beispiel #11
0
 def get(self):
     '''
     Returns a list of available filters. All filters registered in the system
     will be returned. If you believe there is a filter that exists in the
     system but is not present here, it is probably not registered in the
     MongoDB database.
     '''
     client = db_client()
     col = db_collection(client, DATALOADER_DB_NAME, FILTERS_COL_NAME)
     cur = list(col.find({},{"_id":0}))
     return cur
Beispiel #12
0
            def post(self, src_id):
                '''
                For streaming, start or end the streaming service.
                No payload is sent for this request.
                '''
                client = db_client()
                col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME)
                try:
                    src = find_source(col, src_id)

                except IndexError:
                    return 'No resource at that URL.', 404

                filepath = src['rootdir']

                #get filters
                f_client = db_client()
                f_col = db_collection(client, DATALOADER_DB_NAME, FILTERS_COL_NAME)
                filters = f_col.find()
                return utils.stream(src['ingest_id'], filepath)
Beispiel #13
0
            def patch(self, src_id):
                '''
                For streaming, toggles streaming on or off.
                This request is used in conjunction with the POST request to this same endpoint.

                '''
                client = db_client()
                col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME)
                try:
                    src = find_source(col, src_id)

                except IndexError:
                    return 'No resource at that URL.', 404

                filepath = src['rootdir']

                #get filters
                f_client = db_client()
                f_col = db_collection(client, DATALOADER_DB_NAME, FILTERS_COL_NAME)
                filters = f_col.find()
                utils.update(src['ingest_id'], filepath)
                return
Beispiel #14
0
        def get(self, ingest_id):
            '''
            '''
            client = db_client()
            col = db_collection(client, DATALOADER_DB_NAME, INGEST_COL_NAME)

            try:
                src = col.find_one({'ingest_id':ingest_id},{"_id":0})

            except IndexError:
                return 'No resource at that URL', 401

            else:
                return src
Beispiel #15
0
    def get(self):
        '''
        Returns a list of all available visualizations.
        All visualizations registered in the system will be returned. If you believe there is a visualization that exists in the system but is not present here, it is probably not registered in the MongoDB database.
        '''
        vis_options = []

        client = db_client()
        col = db_collection(client, VIS_DB_NAME, VIS_COL_NAME)
        cur = col.find()
        for c in cur:
            response = {key: value for key, value in c.items() if key != '_id'}
            vis_options.append(response)

        return vis_options
Beispiel #16
0
    def delete(self):
        '''
        Deletes all stored sources.
        This will permanently remove all sources from the system. USE CAREFULLY!
        '''
        client = db_client()
        col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME)
        #remove the entries in mongo
        col.remove({})
        #remove the actual files
        for directory in os.listdir(DATALOADER_PATH):
            file_path = os.path.join(DATALOADER_PATH, directory)
            shutil.rmtree(file_path)

        return '', 204
Beispiel #17
0
        def get(self,src_id,matrix_id,output_file,file_download_name):
            '''
            Downloads the specified matrix file.
            Returns the specific file indicated by the user.
            '''

            client = db_client()
            col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME)
            try:
                matrices = find_source(col, src_id)['matrices']
            except IndexError:
                response = {}
                # return ('No resource at that URL.', 404)
            else:
                for matrix in matrices:
                    if matrix['id'] == matrix_id or matrix['name'] == matrix_id:
                        return send_from_directory(matrix['rootdir'],output_file, as_attachment=True, attachment_filename=file_download_name)
Beispiel #18
0
        def get(self, src_id):
            '''
            Returns metadata and a list of matrices available for a particular source.

            src_id can be the UUID or name of the source
            '''
            client = db_client()
            col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME)
            try:
                response = find_source(col, src_id)
                if response is None:
                    return 'No resource at that URL', 404
            except Exception as e:
                return 'Unexpected error %s'%e, 500

            else:
                return response
Beispiel #19
0
            def get(self, src_id, mat_id):
                '''
                Returns metadata for the matrix specified.
                '''
                client = db_client()
                col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME)
                try:
                    matrices = find_source(col, src_id)['matrices']

                except IndexError:
                    return 'No resource at that URL.', 404

                else:
                    for matrix in matrices:
                        if matrix['id'] == mat_id or matrix['name'] == mat_id:
                            return matrix

                    return 'No resource at that URL.', 404
Beispiel #20
0
            def get(self, src_id):
                '''
                Returns a list of schemas for a particular source.
                '''
                client = db_client()
                col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME)

                src = find_source(col, src_id)
                if not src:
                    return 'No resource at that URL.', 404

                filepath = src['rootdir'] + '/source/'

                #get filters
                f_col = db_collection(client, DATALOADER_DB_NAME, FILTERS_COL_NAME)
                filters = f_col.find()

                return utils.explore(src['ingest_id'], filepath, filters)
Beispiel #21
0
    def post(self):
        '''
        Returns a list of applicable available visualizations.
        Not all visualizations are applicable for every dataset. This request requires a list of inputs and will return the visualizations options available based on those inputs.
        '''
        data = request.get_json()

        vis_options = []

        client = db_client()
        col = db_collection(client, VIS_DB_NAME, VIS_COL_NAME)
        cur = col.find()
        if len(data) != 1:
            outputsPersist = []
            for res in data:
                outputsPersist.extend(res['outputs'])
        else:
            outputsPersist = data[0]['outputs']

        if 'selected_features' in data[0]:
            outputsPersist.append('selected_features')

        outputsPersist.append('names')

        for vis in cur:
            print vis
            contains = False
            outputs = outputsPersist[:]
            for i in vis['inputs']:
                if i in outputs:
                    contains = True
                    outputs.pop(outputs.index(i))
                else:
                    contains = False
                    break
            if contains:
                response = {
                    key: value
                    for key, value in vis.items() if key != '_id'
                }
                vis_options.append(response)

        return vis_options
Beispiel #22
0
                def get(self, src_id, mat_id):
                    '''
                    Returns the REAMDME content for the specified matrix.
                    '''
                    client = db_client()
                    col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME)
                    try:
                        matrix = find_matrix(col, src_id, mat_id)
                    except IndexError:
                        return 'No resource at that URL.', 404
                    except AssertionError:
                        return 'Bad mongo query', 500
                    else:
                        try:
                            output_path = matrix['rootdir'] + 'output.txt'
                            with open(output_path) as output:
                                text = output.read()
                            return text
                        except:
                            return 'No Output document for %s/%s'%(src_id, mat_id), 404

                    return 'No resource at that URL.', 404
Beispiel #23
0
            def delete(self, src_id, mat_id):
                '''
                Deletes specified matrix.
                This will permanently remove this matrix and any results generated from it from the system. USE CAREFULLY!
                '''
                client = db_client()
                col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME)
                try:
                    matrices = find_source(col, src_id)['matrices']

                except IndexError:
                    return 'No resource at that URL.', 404

                else:
                    matrices_new = []
                    found = False
                    for each in matrices:
                        if each['id'] != mat_id and each['name'] != mat_id:
                            matrices_new.append(each)
                        else:
                            found = True
                    if found:
                        col.update({'src_id':src_id}, { '$set': {'matrices': matrices_new} })
                    else:
                        return 'No resource at that URL.', 404

                    shutil.rmtree(DATALOADER_PATH + src_id + '/' + mat_id)

                    col = client[DATALOADER_DB_NAME][RESULTS_COL_NAME]
                    try:
                        col.remove({'src_id':mat_id})
                        shutil.rmtree(RESPATH + mat_id)

                    except:
                        pass

                    else:
                        return '', 204
Beispiel #24
0
        def put(self, name, ingest_id, group_name=""):
            '''
            Saves a new resource with a ID.
            Payload can be either a file or JSON structured configuration data. Returns the metadata for the new source.
            '''
            client = db_client()
            col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME)

            # If group_name == 'overwrite' then overwrite with same src_id
            overwrite = False
            if group_name == 'overwrite':
                group_name = ""
                overwrite = True

            # Check for an existing source with the same name. Do not overwrite unless specified
            existing_source = find_source(col, name)
            if existing_source is not None and not overwrite:
                logging.warn("Source Already Exists: {}".format(existing_source['src_id']))
                existing_source['error'] = 1
                existing_source['msg'] = "Source Already Exists"
                return existing_source

            try:
                if existing_source:
                    src_id = existing_source['src_id']
                    if overwrite:
                        col.delete_one({"src_id":src_id})
                        file_path = '/'.join([DATALOADER_PATH,src_id])
                        shutil.rmtree(file_path)
                else:
                    src_id = utils.getNewId()

                t = utils.getCurrentTime()
                conn_info = request.get_json()
                # conn_info = request.get_json(force=True)
                filepath = None
                if conn_info == None:
                    file = request.files['file']
                    ext = re.split('\.', file.filename)[1]
                    if not ext in ALLOWED_EXTENSIONS:
                        print("WARN: File submitted %s is not of a supported filetype".format(file.filename))
                    #     return ('This filetype is not supported.', 415)

                    if 'zip' in file.filename:
                        src_type = 'zip'
                    else:
                        src_type = 'file'

                    rootpath, filepath = write_source_file(DATALOADER_PATH, src_id, file)

                else:
                    src_type = 'conf'
                    rootpath, filepath = write_source_config(DATALOADER_PATH, src_id, conn_info)

                rootpath = DATALOADER_PATH  + src_id + '/'

                source = Source(name, rootpath, src_id, src_type, t, ingest_id, group_name, filepath=filepath)
                source_insert_response = col.insert_one(source.dict())
                if (source_insert_response == False):
                    logging.error("Source Insert Failed")
                    tb = traceback.format_exc()
                    return tb, 406

                response = col.find_one({'_id':source_insert_response.inserted_id},{"_id":0})
            except:
                tb = traceback.format_exc()
                return tb, 406

            return response, 201