Beispiel #1
0
def upload_file(request):
    if request.method == 'POST':
        print(request.FILES)
        all_files = request.FILES.getlist('files[]')
        if len(all_files) > 0:
            try:
                uploaded_file = all_files[0]
                file_name = uploaded_file.name

                filename, file_extension = os.path.splitext(file_name)

                if not file_extension in [".csv", ".h5", '.xlxs']:
                    return JsonResponse({
                        "status": "failure",
                        "message": "Unsupported file format"
                    })

                # TODO: Check for file extension here
                if not os.path.exists(settings.MEDIA_ROOT):
                    os.makedirs(settings.MEDIA_ROOT)

                file_name = "{}_{}".format(time.time(), file_name)
                file_path = os.path.join(settings.MEDIA_ROOT, file_name)
                fout = open(file_path, 'wb+')

                # Iterate through the chunks.
                for chunk in uploaded_file.chunks():
                    fout.write(chunk)
                fout.close()

                # Save this file to database
                dataset = Dataset(path=file_name)
                dataset.save()

                return JsonResponse({
                    'status': 'success',
                    'message': 'Data uploaded successfully',
                    'file_path': file_name,
                    "dataset_id": dataset.id
                })
            except Exception as e:
                return JsonResponse({
                    'status':
                    'failure',
                    'message':
                    'Error:Upload failed:{}'.format(e)
                })
        else:
            return JsonResponse({
                "status": "failure",
                "message": "No file found"
            })
    else:
        print(request)
        return JsonResponse({
            'status': 'failure',
            'message': 'Invalid request'
        })
Beispiel #2
0
def make_dataset(user_id, file_id, filter_id):
    """
    Function form DataSet from File with given id by applying given filter.
    After filtration DataSet is added to DB
    :param user_id: id of User which currently works with this data
    :param file_id: id of a File from which should we get DataSet
    :param filter_id: id of Filter which we apply to File and total amount of rows for DataSet
    :return: id of created DataSet
    """
    filters = Filter.query.get(
        filter_id).params  # getting params for filtering from DB
    dataframe = dataframe_actualization(
        file_id, user_id
    )  # getting actual DataFrame with dropped earlier DataSets formed from same File
    result = filter_apply(
        dataframe, filters
    )  # getting results from filtering function as a list of included rows indexes
    result = [int(x) for x in result
              ]  # transform numpy int[64] to regular int to store array in DB

    new_dataframe = Dataset(file_id=file_id,
                            user_id=user_id,
                            included_rows=result,
                            filter_id=filter_id)
    db.session.add(new_dataframe)
    db.session.commit()
    return new_dataframe.id
    def test_one_article_no_compounds_dataset_model(self):
        # Create article without compound
        art = Article(pmid=12345,
                      journal='Test Journal',
                      year=2018,
                      volume='12',
                      issue='12',
                      pages='1-1000',
                      authors='Douglas Adams',
                      doi='10.1234/HHGTTG',
                      title='HHGTTG',
                      abstract='Test Abstract',
                      num_compounds=0)

        # Get curator
        curator = Curator.query.filter_by(username='******').first()
        self.assertEqual(curator.username, 'test_user')

        # Create empty dataset and add
        ds = Dataset(curator=curator, articles=[art])
        db.session.add(ds)
        db.session.commit()

        # Tests
        self.assertEqual(Dataset.query.count(), 1)
        self.assertEqual(Dataset.query.first().curator.username, 'test_user')
        self.assertEqual(len(Dataset.query.first().articles), 1)
        self.assertEqual(Dataset.query.first().articles[0].pmid, 12345)
Beispiel #4
0
def add_dataset():
    tmp_dir = os.path.join(current_app.instance_path,
                           current_app.config["TEMP_DIR"])
    dataset_dir = os.path.join(current_app.instance_path,
                               current_app.config["DATASET_DIR"])
    form = AdminAddDatasetForm()
    if form.validate_on_submit():
        temp_filename = os.path.join(tmp_dir,
                                     secure_filename(form.file_.data.filename))
        if not os.path.exists(temp_filename):
            flash("Internal error: temporary dataset disappeared.", "error")
            return redirect(url_for("admin.add_dataset"))
        name = get_name_from_dataset(temp_filename)
        target_filename = os.path.join(dataset_dir, name + ".json")
        if os.path.exists(target_filename):
            flash("Internal error: file already exists!", "error")
            return redirect(url_for("admin.add_dataset"))
        os.rename(temp_filename, target_filename)
        if not os.path.exists(target_filename):
            flash("Internal error: file moving failed", "error")
            return redirect(url_for("admin.add_dataset"))
        is_demo = dataset_is_demo(target_filename)
        dataset = Dataset(name=name,
                          md5sum=md5sum(target_filename),
                          is_demo=is_demo)
        db.session.add(dataset)
        db.session.commit()
        flash("Dataset %r added successfully." % name, "success")
        return redirect(url_for("admin.add_dataset"))
    return render_template("admin/add.html", title="Add Dataset", form=form)
    def insert_sample_datasets(self):

        with open(self.datasets_file, 'r') as dataset_file:
            reader = csv.reader(dataset_file)
            next(reader)

            for row in reader:
                dataset = Dataset(dataset_id=row[0],
                                  annex_uuid=row[1],
                                  description=row[2],
                                  owner_id=row[3],
                                  download_path=row[4],
                                  raw_data_url=row[5],
                                  name=row[6],
                                  modality=row[7],
                                  version=row[8],
                                  format=row[9],
                                  category=row[10],
                                  image=self.read_image(row[11]),
                                  date_created=datetime.now(),
                                  date_updated=datetime.now(),
                                  is_private=row[12] == 'True')
                db.session.add(dataset)
        db.session.commit()
        dataset_file.close()
    def test_one_article_one_compound_dataset_model(self):
        # Create compound
        cmpd = Compound(name='Methane',
                        smiles='C',
                        source_organism='Saccharomyces cerevisiae')
        # Create article with compound
        art = Article(pmid=12345,
                      journal='Test Journal',
                      year=2018,
                      volume='12',
                      issue='12',
                      pages='1-1000',
                      authors='Douglas Adams',
                      doi='10.1234/HHGTTG',
                      title='HHGTTG',
                      abstract='Test Abstract',
                      num_compounds=1,
                      compounds=[cmpd])

        # Get curator
        curator = Curator.query.filter_by(username='******').first()
        self.assertEqual(curator.username, 'test_user')

        # Create empty dataset and add
        ds = Dataset(curator=curator, articles=[art])
        db.session.add(ds)
        db.session.commit()

        # Tests
        self.assertEqual(Dataset.query.count(), 1)
        self.assertEqual(len(Dataset.query.first().articles), 1)
        self.assertEqual(Dataset.query.first().articles[0].pmid, 12345)
        self.assertEqual(len(Dataset.query.first().articles[0].compounds), 1)
        self.assertEqual(Dataset.query.first().articles[0].compounds[0].name,
                         'Methane')
Beispiel #7
0
def create_dataset():
    name = request.form['name']
    area = request.form['area']
    source = request.form['source']
    file_type = request.form['file_type']
    link = request.form['link']
    date_obtained = request.form['date_obtained']
    tags = request.form['tags']
    file = request.files['file']

    if file and allowed_file(file.filename):
        filename = secure_filename(file.filename)
        filepath = os.path.join(datasets.config['UPLOAD_FOLDER'], filename)

        if os.path.exists(filepath):
            new_filename = '{}_{}.{}'.format(
                filename.rsplit('.', 1)[0], datetime.now.strftime('%H%M'),
                filename.rsplit('.', 1)[1])
            new_filepath = os.path.join(datasets.config['UPLOAD_FOLDER'],
                                        new_filename)
            file.save(
                os.path.join(datasets.config['UPLOAD_FOLDER'], new_filename))
            new_dataset = Dataset(name=name,
                                  area=area,
                                  source=source,
                                  file_type=file_type,
                                  link=link,
                                  date_obtained=date_obtained,
                                  tags=tags,
                                  file=new_filepath)
        else:
            file.save(os.path.join(datasets.config['UPLOAD_FOLDER'], filename))
            new_dataset = Dataset(name=name,
                                  area=area,
                                  source=source,
                                  file_type=file_type,
                                  link=link,
                                  date_obtained=date_obtained,
                                  tags=tags,
                                  file=filepath)

    db.session.add(new_dataset)
    db.session.commit()

    return jsonify({'status': 'success'}), 200
Beispiel #8
0
    def upload_file(self, file):
        """
        Accepts FileStorage object
        Saves original uploaded file and serialized DataFrame for this file.
        Creates records in database
        :param file: FileStorage
        :return: IDs of created data set and file
        """
        # Get check sum of file
        hashed_file_name = f'{self.get_file_checksum(file)}'

        # Change file name and save it under this name
        file_extension = self.get_file_extension(file.filename)
        file_full_name = f'{hashed_file_name}.{file_extension}'
        file_path = os.path.join(self.files_dir, file_full_name)

        # Send existing file instead of uploading a new one
        if file_full_name in self.files:
            _file = File.query.filter(File.path == file_full_name).first()
            _dataset = Dataset.query.filter(
                Dataset.file_id == _file.id).first()
            logger.info(
                'User {0} uploaded which already existed under id {1}'.format(
                    self.user_id, _file.id))
            return 'Uploaded', _file.id, _dataset.id

        file.seek(0)
        file.save(file_path)

        # Serialize uploaded file as DataFrame (Update when DataFrame interface is ready)
        shape = self.serialize(file_full_name)

        # Get attributes of file
        file_attributes = self.get_attributes(file_path)
        file_attributes['name'] = file.filename
        file_attributes['rows'] = shape[0]
        file_attributes['cols'] = shape[1]

        # Save to db, update when dbm is ready
        new_file = File(path=file_full_name, attributes=file_attributes)
        db.session.add(new_file)
        db.session.flush()
        new_dataset = Dataset(user_id=self.user_id, file_id=new_file.id)
        db.session.add(new_dataset)
        db.session.commit()
        logger.info('User {0} uploaded a new file {1}'.format(
            self.user_id, new_file.id))
        response = {
            'file': {
                'id': new_file.id,
                'name': new_file.attributes['name'],
                'size': new_file.attributes['size'],
                'rows': new_file.attributes['rows']
            },
            'dataset_id': new_dataset.id
        }
        return response
def register_face():
    form = RegistrationForm()
    if form.validate_on_submit():
        if form.images.data:
            print(
                "##############################################################\n"
            )
            print(form.images.data)
            images = []
            # random_hex = secrets.token_hex(8)
            name = form.name.data
            person = Person(name=name)
            db.session.add(person)
            db.session.commit()
            id = Person.query.filter_by(name=name).first().id
            has_at_least_one_image_with_single_face = False
            for image in form.images.data:
                # TODO see if there is one only one face in the image (because suppose if there are
                # 2 persons in the image and the 2nd one tries to recognize himself then if id folder
                # of 1st one comes first than the 2nd one's id folder, 2nd one will be recognized as
                # 1st person as the photo is in 1st person's id folder)
                face_image = faces.hasSingleFace(image)
                if face_image is not None:
                    has_at_least_one_image_with_single_face = True
                    image_fn, image_path = save_image_to_dataset(
                        dir_name=str(id),
                        form_image_name=image.filename,
                        to_be_saved_image=face_image)
                    dataset = Dataset(image_file=image_fn, author=person)
                    db.session.add(dataset)
                    print(image_path)
                    images.append(image_fn)
            if has_at_least_one_image_with_single_face is True:
                db.session.commit()
                faces.make_new_face_encodings()
                flash(
                    f'Congratulations! Successfully registered the face as {form.name.data}. Try recognizing {form.name.data}.',
                    'success')
                return redirect(
                    url_for('recognize_faces', title='Recognize Faces'))
            else:
                flash(
                    f'{form.name.data} not registered as there was no face in the image. Try providing different images.',
                    'danger')
                return render_template('register_face.html',
                                       title="Register Face",
                                       selectedListElement="registerFace",
                                       form=form)
    return render_template('register_face.html',
                           title="Register Face",
                           selectedListElement="registerFace",
                           form=form)
Beispiel #10
0
def _seed_test_datasets_db(app):
    """
    Seeds a set of test datasets populated from a static csv file
    """
    from app import db
    from app.models import User, Dataset, DatasetStats

    dataset_csvfile = os.path.join(app.root_path, "../test/datasets.csv")
    with open(dataset_csvfile, 'r') as data_csv:
        csv_reader = csv.DictReader(data_csv)
        for row in csv_reader:
            dataset = Dataset(
                dataset_id=row['dataset_id'],
                annex_uuid=row['annex_uuid'],
                description=row['description'],
                owner_id=row['owner_id'],
                download_path=row['download_path'],
                raw_data_url=row['raw_data_url'],
                name=row['name'],
                modality=row['modality'],
                version=row['version'],
                format=row['format'],
                category=row['category'],
                date_created=datetime.utcnow(),
                date_updated=datetime.utcnow(),
                is_private=row['is_private'] == 'True'
            )

            db.session.add(dataset)

        dataset_stats_csvfile = os.path.join(
            app.root_path, "../test/datasets_stats.csv")
        with open(dataset_stats_csvfile, 'r') as datastat_csv:
            csv_reader = csv.DictReader(datastat_csv)
            for row in csv_reader:
                dataset_stat = DatasetStats(
                    dataset_id=row['dataset_id'],
                    size=row['size'],
                    files=row['files'],
                    sources=row['sources'],
                    num_subjects=row['num_subjects'],
                    num_downloads=row['num_downloads'],
                    num_likes=row['num_likes'],
                    num_views=row['num_views'],
                    date_updated=datetime.utcnow()
                )
                db.session.add(dataset_stat)

        db.session.commit()
Beispiel #11
0
def new_dataset():
    """
    Creates a new mock dataset to test
    """
    dataset = Dataset(dataset_id="8de99b0e-5f94-11e9-9e05-52545e9add8e",
                      description="Human Brain phantom scans, Multiple MRI"
                      " scans of a single human phantom over 11"
                      " years, T1 weighted images and others on"
                      " 13 scanner in 6 sites accross North America."
                      " The data are available in minc format",
                      name="Multicenter Single Subject Human MRI Phantom",
                      version="1.0",
                      is_private=False,
                      fspath='./test/test_dataset')
    return dataset
Beispiel #12
0
    def test_empty_dataset_model(self):
        """
        Create dataset for test_user with no data
        """
        # Get curator
        curator = Curator.query.filter_by(username='******').first()
        self.assertEqual(curator.username, 'test_user')

        # Create empty dataset and add
        ds = Dataset(curator=curator)
        db.session.add(ds)
        db.session.commit()

        # Tests
        self.assertEqual(Dataset.query.count(), 1)
        self.assertEqual(Dataset.query.first().curator_id, 2)
        self.assertEqual(len(Dataset.query.first().articles), 0)
Beispiel #13
0
    def seed_test_db():
        from app import db
        from app.models import User, Dataset, DatasetStats

        # create an admin user (Not useful now, but at least we will have a user)

        # import the current dataset information (to be replaced by dyanmic process)
        dataset_csvfile = os.path.join(app.root_path, "../test/datasets.csv")
        with open(dataset_csvfile, 'r') as data_csv:
            csv_reader = csv.DictReader(data_csv)
            for row in csv_reader:
                dataset = Dataset(dataset_id=row['dataset_id'],
                                  annex_uuid=row['annex_uuid'],
                                  description=row['description'],
                                  owner_id=row['owner_id'],
                                  download_path=row['download_path'],
                                  raw_data_url=row['raw_data_url'],
                                  name=row['name'],
                                  modality=row['modality'],
                                  version=row['version'],
                                  format=row['format'],
                                  category=row['category'],
                                  date_created=datetime.utcnow(),
                                  date_updated=datetime.utcnow(),
                                  is_private=row['is_private'] == 'True')

                db.session.add(dataset)

            dataset_stats_csvfile = os.path.join(app.root_path,
                                                 "../test/datasets_stats.csv")
            with open(dataset_stats_csvfile, 'r') as datastat_csv:
                csv_reader = csv.DictReader(datastat_csv)
                for row in csv_reader:
                    dataset_stat = DatasetStats(
                        dataset_id=row['dataset_id'],
                        size=row['size'],
                        files=row['files'],
                        sources=row['sources'],
                        num_subjects=row['num_subjects'],
                        num_downloads=row['num_downloads'],
                        num_likes=row['num_likes'],
                        num_views=row['num_views'],
                        date_updated=datetime.utcnow())
                    db.session.add(dataset_stat)

            db.session.commit()
Beispiel #14
0
def dataset():
    form = DatasetForm()
    form.datasets_choices()

    if request.method == 'POST' and form.validate_on_submit():
        logger.info(f'dataset: {form.dataset.data}, split: {form.split.data}, '
                    f'kfolds: {form.kfolds.data}')

        # parameters used when the dataset was created.
        details = form.dataset.data.split('_')
        dataset = Dataset(file=form.dataset.data,
                          window=details[1].split('w')[-1],
                          aggregation=details[2].split('t')[-1],
                          size=details[3].split('.csv')[0].split('s')[-1],
                          split=form.split.data,
                          kfolds=form.kfolds.data)
        db.session.add(dataset)
        db.session.commit()

        return redirect(url_for('setting.classifier'))
    return render_template('setting/dataset.html', form=form)
Beispiel #15
0
def search_dataset():
    qstring = request.args.get('query')
    results, total = Dataset.search(qstring)

    output = {}
    output["total"] = total
    output["results"] = []
    for result in results:
        values = {
            'name': result.name,
            'area': result.area,
            'source': result.source,
            'file_type': result.file_type,
            'link': result.link,
            'date_obtained': result.date_obtained,
            'tags': result.tags,
            'id': result.id,
        }
        output["results"].append(values)

    return jsonify(output), 200
    def insert_sample_datasets(self):

        with open(self.datasets_file, 'r') as dataset_file:
            reader = csv.reader(dataset_file)
            next(reader)

            for row in reader:
                dataset = Dataset(id=row[0],
                                  dataset_id=row[1],
                                  owner_id=row[2],
                                  name=row[3],
                                  modality=row[4],
                                  version=row[5],
                                  format=row[6],
                                  category=row[7],
                                  date_created=datetime.now(),
                                  date_updated=datetime.now(),
                                  is_private=row[8] == 'True')
                db.session.add(dataset)
        db.session.commit()
        dataset_file.close()
Beispiel #17
0
def new_dataset():
    """
    Creastes a new mock dataset to test
    """
    dataset = Dataset(dataset_id="8de99b0e-5f94-11e9-9e05-52545e9add8e",
                      annex_uuid="4fd032a1-220a-404e-95ac-ccaa3f7efcb7",
                      description="Human Brain phantom scans, Multiple MRI"
                      " scans of a single human phantom over 11"
                      " years, T1 weighted images and others on"
                      " 13 scanner in 6 sites accross North America."
                      " The data are available in minc format",
                      owner_id=1,
                      download_path="multicenter-phantom",
                      raw_data_url="https://phantom-dev.loris.ca",
                      name="Multicenter Single Subject Human MRI Phantom",
                      modality="Imaging",
                      version="1.0",
                      format="minc",
                      category="Phantom",
                      is_private=False)
    return dataset
    def push_docker_img(self, dir_name,current_user_id, name, report):
        client = docker.from_env()
        current_user_obj = User.query.get(current_user_id)
        image_name = current_user_obj.username + '-' + name
        repo_name = os.environ.get('DOCKER_REPO') + '/'
        print(client.images.push(repository=repo_name + image_name), file=sys.stderr)

        ########## UPDATING DB ######################################################################

        # add dataset to database
        new_dataset = Dataset(url="https://hub.docker.com/raas/" + repo_name + image_name + "/",
                              author=current_user_obj,
                              name=name,
                              report=report)
        db.session.add(new_dataset)
        db.session.commit()

        ########## CLEANING UP ######################################################################

        self.clean_up_datasets(dir_name)
        print("Returning")
Beispiel #19
0
def new_dataset():

    if request.method == 'POST':
        print(request.form)
        
        #if request.form['dataset_uploaded']:
        #    flash(f'Your dataset has been created!', 'success')
        #    return redirect(url_for('datasets.new_dataset'))
        
        dataset = Dataset(title=request.form['title'], abstract=request.form['abstract'], category=request.form['category'])
        db.session.add(dataset)
        db.session.commit()
    
        return json.dumps({"dataset_id": dataset.id})

    form = DatasetForm()
    if form.validate_on_submit():
        print(f'datasets data: {form.datasets}')
        # for files in form.datasets.getlist():
        #     print(filles)

    return render_template('create_dataset.html', title='New Dataset',form=form, legend='New Dataset')
def run_command():
    atlas.dbInit("mysql+pymysql", "jvansan", "", "127.0.0.1",
                 "np_atlas_2018_07")
    sess = atlas.startSession()
    curator = Curator.query.filter_by(username="******").first()
    articles = get_first_N_article_with_compounds(sess, 100)
    ds_articles = []
    for art_compound in articles:
        a = art_compound[0]
        compounds = []
        for c in art_compound[1]:
            org = get_compound_origin(c, sess)
            org_string = org.genus.name + " " + org.species
            compounds.append(
                Compound(name=c.names[0].name,
                         smiles=c.smiles,
                         source_organism=org_string))
            # Make sure to sort the names of the compounds before insertion
            compounds.sort(key=lambda x: x.name)
        art = Article(pmid=a.pmid,
                      journal=a.journal.title,
                      year=a.year,
                      volume=a.volume,
                      issue=a.issue,
                      pages=a.pages,
                      authors=a.authors,
                      doi=a.doi,
                      title=a.title,
                      abstract=a.abstract,
                      num_compounds=len(compounds),
                      compounds=compounds)

        ds_articles.append(art)

    ds = Dataset(curator=curator, articles=ds_articles)
    db.session.add(ds)
    db.session.commit()
Beispiel #21
0
def _update_datasets(app):
    """
    Updates from conp-datasets
    """
    from app import db
    from app.models import ArkId
    from app.models import Dataset as DBDataset
    from app.models import DatasetAncestry as DBDatasetAncestry
    from sqlalchemy import exc
    from datalad import api
    from datalad.api import Dataset as DataladDataset
    import fnmatch
    import json
    from pathlib import Path
    import git

    datasetsdir = Path(app.config['DATA_PATH']) / 'conp-dataset'
    datasetsdir.mkdir(parents=True, exist_ok=True)

    # Initialize the git repository object
    try:
        repo = git.Repo(datasetsdir)
    except git.exc.InvalidGitRepositoryError:
        repo = git.Repo.clone_from('https://github.com/CONP-PCNO/conp-dataset',
                                   datasetsdir,
                                   branch='master')

    # Update to latest commit
    origin = repo.remotes.origin
    origin.pull('master')
    repo.submodule_update(recursive=False, keep_going=True)

    d = DataladDataset(path=datasetsdir)
    if not d.is_installed():
        api.clone(source='https://github.com/CONP-PCNO/conp-dataset',
                  path=datasetsdir)
        d = DataladDataset(path=datasetsdir)

    try:
        d.install(path='', recursive=True)
    except Exception as e:
        print("\033[91m")
        print("[ERROR  ] An exception occurred in datalad update.")
        print(e.args)
        print("\033[0m")
        return

    print('[INFO   ] conp-dataset update complete')
    print('[INFO   ] Updating subdatasets')

    for ds in d.subdatasets():
        print('[INFO   ] Updating ' + ds['gitmodule_url'])
        subdataset = DataladDataset(path=ds['path'])
        if not subdataset.is_installed():
            try:
                api.clone(source=ds['gitmodule_url'], path=ds['path'])
                subdataset = DataladDataset(path=ds['path'])
                subdataset.install(path='')
            except Exception as e:
                print("\033[91m")
                print(
                    "[ERROR  ] An exception occurred in datalad install for " +
                    str(ds) + ".")
                print(e.args)
                print("\033[0m")
                continue

        # The following relates to the DATS.json files
        # of the projects directory in the conp-dataset repo.
        # Skip directories that aren't projects.
        patterns = [app.config['DATA_PATH'] + '/conp-dataset/projects/*']
        if not any(
                fnmatch.fnmatch(ds['path'], pattern) for pattern in patterns):
            continue

        dirs = os.listdir(ds['path'])
        descriptor = ''
        for file in dirs:
            if fnmatch.fnmatch(file.lower(), 'dats.json'):
                descriptor = file

        if descriptor == '':
            print("\033[91m")
            print('[ERROR  ] DATS.json file can`t be found in ' + ds['path'] +
                  ".")
            print("\033[0m")
            continue

        try:
            with open(os.path.join(ds['path'], descriptor), 'r') as f:
                dats = json.load(f)
        except Exception as e:
            print("\033[91m")
            print("[ERROR  ] Descriptor file can't be read.")
            print(e.args)
            print("\033[0m")
            continue

        # use dats.json data to fill the datasets table
        # avoid duplication / REPLACE instead of insert
        dataset = DBDataset.query.filter_by(
            dataset_id=ds['gitmodule_name']).first()

        # pull the timestamp of the first commit in the git log for the dataset create date
        createDate = datetime.utcnow()
        try:
            createTimeStamp = os.popen(
                "git -C {} log --pretty=format:%ct --reverse | head -1".format(
                    ds['path'])).read()
            createDate = datetime.fromtimestamp(int(createTimeStamp))
        except Exception:
            print("[ERROR  ] Create Date couldnt be read.")

        firstMergeDate = datetime.utcnow()
        try:
            firstMergeTimeStamp = os.popen(
                "git -C {} log --pretty=format:%ct --reverse {} | head -1".
                format(app.config['DATA_PATH'] + "/conp-dataset",
                       ds['path'])).read()
            firstMergeDate = datetime.fromtimestamp(int(firstMergeTimeStamp))
        except Exception:
            print(
                "[ERROR  ] First merge date of the submodule dataset could not be read."
            )

        # last commit in the git log for the dataset update date
        updateDate = datetime.utcnow()
        try:
            createTimeStamp = os.popen(
                "git -C {} log --pretty=format:%ct | head -1".format(
                    ds['path'])).read()
            updateDate = datetime.fromtimestamp(int(createTimeStamp))
        except Exception:
            print("[ERROR  ] Update Date couldnt be read.")

        # get the remote URL
        remoteUrl = None
        try:
            remoteUrl = os.popen(
                "git -C {} config --get remote.origin.url".format(
                    ds['path'])).read()
        except Exception:
            print("[ERROR  ] Remote URL couldnt be read.")

        if dataset is None:
            dataset = DBDataset()
            dataset.dataset_id = ds['gitmodule_name']
            dataset.date_created = createDate
            dataset.date_added_to_portal = firstMergeDate

        if dataset.date_created != createDate:
            dataset.date_created = createDate

        # check for dataset ancestry
        extraprops = dats.get('extraProperties', [])
        for prop in extraprops:
            if prop.get('category') == 'parent_dataset_id':
                for x in prop.get('values', []):
                    if x.get('value', None) is None:
                        continue
                    datasetAncestry = DBDatasetAncestry()
                    datasetAncestry.id = str(uuid.uuid4())
                    datasetAncestry.parent_dataset_id = 'projects/' + \
                        x.get('value', None)
                    datasetAncestry.child_dataset_id = dataset.dataset_id
                    try:
                        db.session.merge(datasetAncestry)
                        db.session.commit()
                    except exc.IntegrityError:
                        # we already have a record of this ancestry
                        db.session.rollback()

        if not dataset.date_added_to_portal:
            dataset.date_added_to_portal = firstMergeDate

        dataset.date_updated = updateDate
        dataset.fspath = ds['path']
        dataset.remoteUrl = remoteUrl
        dataset.description = dats.get('description',
                                       'No description in DATS.json')
        dataset.name = dats.get('title', os.path.basename(dataset.dataset_id))

        db.session.merge(dataset)
        db.session.commit()

        # if the dataset does not have an ARK identifier yet, generate it
        dataset_with_ark_id_list = [
            row[0] for row in db.session.query(ArkId.dataset_id).all()
        ]
        if dataset.dataset_id not in dataset_with_ark_id_list:
            new_ark_id = ark_id_minter(app, 'dataset')
            save_ark_id_in_database(app, 'dataset', new_ark_id,
                                    dataset.dataset_id)
        print('[INFO   ] ' + ds['gitmodule_name'] + ' updated.')
def update_face(person_id):
    form = UpdateDatasetForm()
    person = Person.query.get_or_404(person_id)
    datasets = Dataset.query.filter_by(author=person).all()
    print(
        '###############################################################################'
    )
    print(person)
    print(datasets)
    print(
        '###############################################################################'
    )
    if form.validate_on_submit():
        if form.images.data:
            print(
                "##############################################################\n"
            )
            print(form.images.data)
            images = []
            # random_hex = secrets.token_hex(8)
            has_at_least_one_image_with_single_face = False
            for image in form.images.data:
                # TODO see if there is one only one face in the image (because suppose if there are
                # 2 persons in the image and the 2nd one tries to recognize himself then if id folder
                # of 1st one comes first than the 2nd one's id folder, 2nd one will be recognized as
                # 1st person as the photo is in 1st person's id folder)

                # see if there is any image or not
                if image.mimetype.find("image") == -1:
                    break
                face_image = faces.hasSingleFace(image)
                if face_image is not None:
                    has_at_least_one_image_with_single_face = True
                    image_fn, image_path = save_image_to_dataset(
                        dir_name=str(person_id),
                        form_image_name=image.filename,
                        to_be_saved_image=face_image)
                    dataset = Dataset(image_file=image_fn, author=person)
                    db.session.add(dataset)
                    print(image_path)
                    images.append(image_fn)
            name = form.name.data
            if name != person.name:
                person.name = name
            image_deleted_from_dataset = False
            if form.images_to_be_deleted.data:
                for image_id in form.images_to_be_deleted.data.split(";"):
                    dataset = Dataset.query.get(image_id)
                    path_to_image = './app/static/images/dataset/' + str(
                        person_id) + '/' + dataset.image_file
                    if os.path.exists(path_to_image):
                        image_deleted_from_dataset = True
                        os.remove(path_to_image)
                        db.session.delete(dataset)
            db.session.commit()
            # update dataset_faces.dat if either an image was deleted from dataset or new image was added
            if has_at_least_one_image_with_single_face is True or image_deleted_from_dataset is True:
                faces.make_new_face_encodings()
            flash(f'Successfully updated {form.name.data}.', 'success')
            return redirect(url_for('face', person_id=person_id))
        else:
            flash(f'{form.name.data} not updated.', 'danger')
            return render_template('update_face.html',
                                   title=person.name,
                                   file_select=True,
                                   enableDeletePersonPhoto=True,
                                   datasets=datasets,
                                   form=form)

    elif request.method == 'GET':
        form.name.data = person.name
    return render_template('update_face.html',
                           title=person.name,
                           file_select=True,
                           enableDeletePersonPhoto=True,
                           datasets=datasets,
                           form=form)
Beispiel #23
0
def _update_datasets(app):
    """
    Updates from conp-datasets
    """
    from app import db, config
    from app.models import Dataset as DBDataset
    from datalad import api
    from datalad.api import Dataset as DataladDataset
    import fnmatch
    import json

    datasetspath = app.config['DATA_PATH']

    d = DataladDataset(path=datasetspath + '/conp-dataset')
    if not d.is_installed():
        api.clone(source='https://github.com/CONP-PCNO/conp-dataset',
                  path=datasetspath + '/conp-dataset')
        d = DataladDataset(path=datasetspath + '/conp-dataset')
        d.install(path='', recursive=True)

    try:
        d.update(path='', merge=True, recursive=True)
    except Exception as e:
        print("\033[91m")
        print("[ERROR  ] An exception occurred in datalad update.")
        print(e.args)
        print("\033[0m")
        return

    print('[INFO   ] conp-dataset update complete')
    print('[INFO   ] Updating subdatasets')

    for ds in d.subdatasets():
        print('[INFO   ] Updating ' + ds['gitmodule_url'])
        subdataset = DataladDataset(path=ds['path'])
        if not subdataset.is_installed():
            try:
                api.clone(source=ds['gitmodule_url'], path=ds['path'])
                subdataset = DataladDataset(path=ds['path'])
                subdataset.install(path='')
            except Exception as e:
                print("\033[91m")
                print(
                    "[ERROR  ] An exception occurred in datalad install for " +
                    str(ds) + ".")
                print(e.args)
                print("\033[0m")
                continue

        dirs = os.listdir(ds['path'])
        descriptor = ''
        for file in dirs:
            if fnmatch.fnmatch(file.lower(), 'dats.json'):
                descriptor = file

        if descriptor == '':
            print("\033[91m")
            print('[ERROR  ] DATS.json file can`t be found in ' + ds['path'] +
                  ".")
            print("\033[0m")
            continue

        try:
            with open(os.path.join(ds['path'], descriptor), 'r') as f:
                dats = json.load(f)
        except Exception as e:
            print("\033[91m")
            print("[ERROR  ] Descriptor file can't be read.")
            print(e.args)
            print("\033[0m")
            continue

        # use dats.json data to fill the datasets table
        # avoid duplication / REPLACE instead of insert
        dataset = DBDataset.query.filter_by(
            dataset_id=ds['gitmodule_name']).first()
        if dataset is None:
            dataset = DBDataset()
            dataset.dataset_id = ds['gitmodule_name']
            dataset.date_created = datetime.utcnow()

        dataset.date_updated = datetime.utcnow()
        dataset.fspath = ds['path']
        dataset.description = dats.get('description',
                                       'No description in DATS.json')
        dataset.name = dats.get('title', os.path.basename(dataset.dataset_id))

        db.session.merge(dataset)
        db.session.commit()
        print('[INFO   ] ' + ds['gitmodule_name'] + ' updated.')
Beispiel #24
0
def _update_datasets(app):
    """
    Updates from conp-datasets
    """
    from app import db, config
    from app.models import Dataset as DBDataset
    from datalad import api
    from datalad.api import Dataset as DataladDataset
    import fnmatch
    import json
    from pathlib import Path
    import git

    datasetsdir = Path(app.config['DATA_PATH']) / 'conp-dataset'
    datasetsdir.mkdir(parents=True, exist_ok=True)

    # Initialize the git repository object
    try:
        repo = git.Repo(datasetsdir)
    except git.exc.InvalidGitRepositoryError as e:
        repo = git.Repo.clone_from(
            'https://github.com/CONP-PCNO/conp-dataset',
            datasetsdir,
            branch='master'
        )

    # Update to latest commit
    origin = repo.remotes.origin
    origin.pull('master')
    repo.submodule_update(recursive=False, keep_going=True)

    d = DataladDataset(path=datasetsdir)
    if not d.is_installed():
        api.clone(
            source='https://github.com/CONP-PCNO/conp-dataset',
            path=datasetsdir
        )
        d = DataladDataset(path=datasetsdir)

    try:
        d.install(path='', recursive=True)
    except Exception as e:
        print("\033[91m")
        print("[ERROR  ] An exception occurred in datalad update.")
        print(e.args)
        print("\033[0m")
        return

    print('[INFO   ] conp-dataset update complete')
    print('[INFO   ] Updating subdatasets')

    for ds in d.subdatasets():
        print('[INFO   ] Updating ' + ds['gitmodule_url'])
        subdataset = DataladDataset(path=ds['path'])
        if not subdataset.is_installed():
            try:
                api.clone(
                    source=ds['gitmodule_url'],
                    path=ds['path']
                )
                subdataset = DataladDataset(path=ds['path'])
                subdataset.install(path='')
            except Exception as e:
                print("\033[91m")
                print("[ERROR  ] An exception occurred in datalad install for " + str(ds) + ".")
                print(e.args)
                print("\033[0m")
                continue

        dirs = os.listdir(ds['path'])
        descriptor = ''
        for file in dirs:
            if fnmatch.fnmatch(file.lower(), 'dats.json'):
                descriptor = file

        if descriptor == '':
            print("\033[91m")
            print('[ERROR  ] DATS.json file can`t be found in ' + ds['path'] + ".")
            print("\033[0m")
            continue

        try:
            with open(os.path.join(ds['path'], descriptor), 'r') as f:
                dats = json.load(f)
        except Exception as e:
            print("\033[91m")
            print("[ERROR  ] Descriptor file can't be read.")
            print(e.args)
            print("\033[0m")
            continue

        # use dats.json data to fill the datasets table
        # avoid duplication / REPLACE instead of insert
        dataset = DBDataset.query.filter_by(dataset_id=ds['gitmodule_name']).first()
        if dataset is None:
            dataset = DBDataset()
            dataset.dataset_id = ds['gitmodule_name']
            dataset.date_created = datetime.utcnow()

        dataset.date_updated = datetime.utcnow()
        dataset.fspath = ds['path']
        dataset.description = dats.get('description', 'No description in DATS.json')
        dataset.name = dats.get(
            'title',
            os.path.basename(dataset.dataset_id)
        )

        db.session.merge(dataset)
        db.session.commit()
        print('[INFO   ] ' + ds['gitmodule_name'] + ' updated.')
Beispiel #25
0
from flask import abort, jsonify
from . import main
from app.models import Dataset
import json
import pydap.client


dataset = Dataset.objects(name='vic_conus_3km').first()
dataset.data = pydap.client.open_url(dataset.url)


@main.route("/")
def hello():
    return "Hello World"


@main.route("/<api_key>/<location>", methods=['GET'])
def opendap_json(api_key=None, location=None):
    try:
        loc = location.split(',')
    except:
        abort(404)
    try:
        lat = float(loc[0])
        lon = float(loc[1])
    except:
        abort(404)
    if lat and lon:
        location = json.loads(dataset.get_xy(lat=lat,lon=lon))
    else:
        return "error: provide latitude and longitude"
def create(request):
    if request.method == 'POST':
        dataset = Dataset()
        dataset.owner = request.user
        dataset.name = request.POST['name']
        dataset.number_of_labels = request.POST['number_of_labels']
        dataset.description = request.POST['description']

        if not dataset.privacy_validation(request.POST['privacy']):
            return render(request, '400.html', status=400)

        dataset_file = request.FILES['dataset']
        reader = csv.reader(dataset_file, encoding='utf-8')
        header_list = reader.next()

        label_name = request.POST.get('label_name', 'CLASS')
        append_label_column = request.POST.get('append_label_column', False)
        if not append_label_column:
            label_index = header_list.index(label_name)
            header_list.pop(label_index)

        header_list.append(label_name)
        dataset.header = csvlist_to_string(header_list).strip()
        dataset.save()

        samples_count = 0
        for row_list in reader:
            samples_count += 1
            if not append_label_column:
                label_string = row_list.pop(label_index)

            row = csvlist_to_string(row_list).strip()
            sample = Sample(dataset=dataset, data=row,
                            original_index=samples_count)
            sample.save()

            if not append_label_column and label_string:
                label = Label(owner=request.user, sample=sample,
                              label=label_string)
                label.save()
                sample.times_labeled = 1
                sample.save()

        dataset.number_of_samples = samples_count
        dataset.save()

        return HttpResponseRedirect(
            reverse('datasets_show', args=(dataset.id,)))
Beispiel #27
0
def load_dataset(directory=None):
    """Page to load new datasets from within HOST."""
    data_dir = os.path.join(current_app.config['ABS_PATH'],
                            'static/datasets/preloaded')

    # All raters
    all_raters = request.args.get('all_raters', 0, type=int)

    # Choices of directories to load in form
    dir_choices = [
        d for d in os.listdir(data_dir)
        if os.path.isdir(os.path.join(data_dir, d))
    ]
    dir_choices.sort()

    info = {'directory': directory, 'new_imgs': 0}
    if directory is not None:
        # Save useful info for jinja template
        info['model'] = Dataset.query.filter_by(name=directory).first()
        # Check access before loading files
        if info['model']:
            info['access'] = current_user.has_access(info['model'])
            info['saved_imgs'] = info['model'].images.count()
        else:
            info['access'] = True
            info['saved_imgs'] = 0

        if info['access']:
            # Count the files in the directory
            all_files = []
            for root, _, files in os.walk(os.path.join(data_dir, directory)):
                all_files.extend([
                    os.path.join(root, f) for f in files
                    if not f.startswith('.')
                ])
            info['new_imgs'] = len(all_files) - info['saved_imgs']

    form = LoadDatasetForm()
    form.dir_name.choices = dir_choices

    # Form submission must be restricted by access in template
    if form.validate_on_submit():
        if info['model']:
            new_dataset = False
        else:
            # If dataset is not a Dataset Model (does not exist), create it
            info['model'] = Dataset(name=form.dir_name.data,
                                    creator=current_user)
            db.session.add(info['model'])
            new_dataset = True
            flash(f"{info['model'].name} was created as an OPEN dataset",
                  'info')

        if len(all_files) > 10:
            current_user.launch_task('load_data',
                                     f"Loading {info['new_imgs']} new images "
                                     f"to {info['model'].name} dataset...",
                                     icon='load',
                                     alert_color='primary',
                                     files=all_files,
                                     dataset_name=info['model'].name,
                                     new_dataset=new_dataset,
                                     ignore_existing=True)
            db.session.commit()

        else:
            try:
                # Function returns number of uploaded images
                loaded_imgs = load_data(all_files,
                                        dataset=info['model'],
                                        host=True,
                                        new_dataset=new_dataset)

            except OrphanDatasetError:
                # If orphaned dataset, delete it
                # TODO this somehow throws an error; look into this
                # db.session.delete(info['model'])
                flash(
                    'No new files were successfully loaded '
                    'leaving the dataset empty', 'warning')

            else:
                if not new_dataset and loaded_imgs == 0:
                    flash('No new files were successfully loaded', 'warning')
                else:
                    flash(f'{loaded_imgs} file(s) successfully loaded!',
                          'success')
            finally:
                # Commit changes in database
                db.session.commit()

        return redirect(url_for('main.dashboard', all_raters=all_raters))

    # Form validation errors
    for _, error in form.errors.items():
        flash(error[0], 'danger')

    return render_template('data/load_dataset.html',
                           form=form,
                           title="Load Dataset",
                           dictionary=info,
                           all_raters=all_raters)
Beispiel #28
0
def upload_dataset():
    """Page to upload new dataset of MRI."""
    data_dir = os.path.join(current_app.config['ABS_PATH'],
                            'static/datasets/uploaded')

    # All raters
    all_raters = request.args.get('all_raters', 0, type=int)

    form = UploadDatasetForm()
    if form.validate_on_submit():
        files = request.files.getlist(form.dataset.name)
        savedir = os.path.join(data_dir, form.dataset_name.data)

        # If savedir does not exist; create it
        if not os.path.isdir(savedir):
            os.makedirs(savedir)

        # Form checks that dataset does not exist already
        # so there is no need to check here; just create it
        dataset = Dataset(name=form.dataset_name.data,
                          creator=current_user,
                          private=form.privacy.data)
        dataset.grant_access(current_user)
        db.session.add(dataset)

        privacy = 'a PRIVATE' if dataset.private else 'an OPEN'
        flash(f"{dataset.name} was created as {privacy} dataset", 'info')

        if len(files) > 10:
            # Redis can't handle FileStorage
            # First upload all files
            files_uploaded = upload_data(files, savedir)
            current_user.launch_task('load_data',
                                     f'Uploading {len(files)} new images '
                                     f'to {dataset.name} dataset...',
                                     icon='upload',
                                     alert_color='primary',
                                     files=files_uploaded,
                                     dataset_name=dataset.name,
                                     new_dataset=True)
            db.session.commit()
        else:
            try:
                # Function returns number of uploaded images
                loaded_imgs = load_data(files, dataset, savedir=savedir)
            except OrphanDatasetError:
                # If orphaned dataset, delete it
                db.session.delete(dataset)
            else:
                # If not, that means at least one image was uploaded
                # flash success with number of uploads
                flash(f'{loaded_imgs} file(s) successfully loaded!', 'success')
            finally:
                # Commit changes in database
                db.session.commit()

        return redirect(url_for('main.dashboard', all_raters=all_raters))
    for _, error in form.errors.items():
        flash(error[0], 'danger')
    return render_template('data/upload_dataset.html',
                           form=form,
                           all_raters=all_raters,
                           title='Upload Dataset')
Beispiel #29
0
def upload_dataset():
    user = User.query.filter_by(UserAccountId=current_user.get_id()).first()

    form = DatasetForm()

    if form.submit.data and form.validate_on_submit():

        odesc = Dataset.query.order_by(Dataset.OfferingId.desc()).first()
        if not odesc:
            o_id = 1
        else:
            o_id = odesc.OfferingId + 1

        o_type = int(request.form['offering_type'])
        df_ext = form.dataset_upload.data.filename.rsplit('.', 1)[1]
        df_name = secure_filename(
            str(user.UserAccountId) + '-' + str(o_id) + '-df.' + df_ext)
        df_path = os.path.join(app.root_path, 'dataset-files', df_name)
        form.dataset_upload.data.save(
            os.path.join(app.root_path, 'dataset-files', df_name))

        if form.sample_upload.data:
            sf_ext = form.sample_upload.data.filename.rsplit('.', 1)[1]
            sf_name = secure_filename(
                str(user.UserAccountId) + '-' + str(o_id) + '-sf.' + sf_ext)
            sf_path = os.path.join(app.root_path, 'sample-files', sf_name)
            form.sample_upload.data.save(
                os.path.join(app.root_path, 'sample-files', sf_name))
        else:
            sf_name = None
            sf_path = None

        lf_ext = form.license_upload.data.filename.rsplit('.', 1)[1]
        lf_name = secure_filename(
            str(user.UserAccountId) + '-' + str(o_id) + '-lf.' + lf_ext)
        lf_path = os.path.join(app.root_path, 'license-files', lf_name)
        form.license_upload.data.save(
            os.path.join(app.root_path, 'license-files', lf_name))

        if o_type == 1:
            uf = 0
        elif o_type == 2:
            uf = int(form.update_frequency.data)

        dataset = Dataset(OfferingName=form.offering_name.data, OfferingType=o_type, Category=form.category.data, \
            Description=form.description.data, Region=form.region.data, DateCreated=form.date_created.data, \
            DateLastUpdated=form.last_updated.data, UpdateFrequency=uf, DataFileName=df_name, DataFilePath=df_path, \
            SampleFileName=sf_name, SampleFilePath=sf_path, LicenseFileName=lf_name, LicenseFilePath=lf_path, \
            Price=form.price.data, owner=user)
        db.session.add(dataset)
        db.session.commit()

        # new_row = [form.offering_name.data, user.UserName, form.category.data, form.region.data, o_type, uf, form.description.data, form.price.data]
        # train_file = os.path.join(app.root_path, 'train.csv')
        # with open(train_file,'a') as fd:
        #     writer = csv.writer(fd, lineterminator='\n')
        #     writer.writerow(new_row)

        return redirect(url_for('my_offerings'))

    if form.calc.data and form.offering_name.data and form.description.data and form.region.data:

        o_type = int(request.form['offering_type'])
        if o_type == 1:
            uf = 0
        elif o_type == 2:
            uf = int(form.update_frequency.data)

        data = {
            'name': [form.offering_name.data],
            'seller': [user.UserName],
            'category': [form.category.data],
            'region': [form.region.data],
            'offering_type': [o_type],
            'update_frequency': [uf],
            'description': [form.description.data],
            'price': [0]
        }
        df = pd.DataFrame(data)
        train_file = os.path.join(app.root_path, 'train.csv')
        sug_price = calc_price(df, train_file)
        form.suggested_price.data = 'Suggested Price: $' + str(
            round(sug_price, 2))

    elif form.calc.data:
        form.suggested_price.data = ''
        flash(
            'Name, Region and Description are required for suggesting price!')

    return render_template('dataset-upload.html',
                           title='Upload Data',
                           form=form)
Beispiel #30
0
def create(request):
    if request.method == 'POST':
        dataset = Dataset()
        dataset.owner = request.user
        dataset.name = request.POST['name']
        dataset.number_of_labels = request.POST['number_of_labels']
        dataset.description = request.POST['description']

        if not dataset.privacy_validation(request.POST['privacy']):
            return render(request, '400.html', status=400)

        dataset_file = request.FILES['dataset']
        reader = csv.reader(dataset_file, encoding='utf-8')
        header_list = reader.next()

        label_name = request.POST.get('label_name', 'CLASS')
        append_label_column = request.POST.get('append_label_column', False)
        if not append_label_column:
            label_index = header_list.index(label_name)
            header_list.pop(label_index)

        header_list.append(label_name)
        dataset.header = csvlist_to_string(header_list).strip()
        dataset.save()

        samples_count = 0
        for row_list in reader:
            samples_count += 1
            if not append_label_column:
                label_string = row_list.pop(label_index)

            row = csvlist_to_string(row_list).strip()
            sample = Sample(dataset=dataset,
                            data=row,
                            original_index=samples_count)
            sample.save()

            if not append_label_column and label_string:
                label = Label(owner=request.user,
                              sample=sample,
                              label=label_string)
                label.save()
                sample.times_labeled = 1
                sample.save()

        dataset.number_of_samples = samples_count
        dataset.save()

        return HttpResponseRedirect(
            reverse('datasets_show', args=(dataset.id, )))