Ejemplo n.º 1
0
def set_current_user_from_x_forwarded():
    """Check the headers X-Forwarded-User and X-Forwarded-Email from Oauth2 Proxy, and use them to load the user.
    If the user does not exist, create it.

    Important: If there is no header name, we load the default user from the configuration. Don't set it in Production.
    """
    import taiga2.controllers.models_controller as mc

    request = flask.request
    config = flask.current_app.config
    user = None

    # Use for development environment
    default_user_email = config.get("DEFAULT_USER_EMAIL", None)

    # Use for production environment
    user_name_header_name = request.headers.get("X-Forwarded-User", None)
    user_email_header_name = request.headers.get("X-Forwarded-Email", None)

    if user_email_header_name is not None:
        try:
            user = mc.get_user_by_email(user_email_header_name)
        except NoResultFound:
            # User does not exists so we can create it
            username = user_email_header_name.split("@")[0]
            user = mc.add_user(name=username, email=user_email_header_name)
            log.debug("We just created the user {} with email {}".format(
                username, user_email_header_name))
            log.debug("Check of the user name ({}) and email ({})".format(
                user.name, user.email))
        user_id = user.id
        log.debug(
            "Looked up header user_email %s to find username: %s",
            user_email_header_name,
            user_id,
        )
    elif user_name_header_name is not None:
        user = mc.get_user_by_name(user_name_header_name)
        log.debug(
            f"Looked up header user_name {user_name_header_name} to find user with id: {user.id}"
        )

    if user is None and default_user_email is not None:
        print(
            "We did not find the user from the headers, loading the default user by its email {}"
            .format(default_user_email))

        try:
            user = mc.get_user_by_email(default_user_email)
        except NoResultFound:
            user = mc.add_user(name=str(uuid.uuid4()),
                               email=default_user_email)

    flask.g.current_user = user
    return None
Ejemplo n.º 2
0
def test_view_not_owned(session: SessionBase):
    new_user = mc.add_user("test", "*****@*****.**")
    flask.g.current_user = new_user

    new_user_not_tested = mc.add_user("test_useless", "*****@*****.**")

    # new_folder_not_owned = FolderFactory(creator=new_user_not_tested)
    new_folder_not_owned = mc.add_folder(name="folder_not_owned",
                                         folder_type=Folder.FolderType.folder,
                                         description="")
    new_folder_not_owned.creator = new_user_not_tested

    right_not_owned = mc.get_rights(new_folder_not_owned.id)

    assert right_not_owned == EntryRightsEnum.can_view
Ejemplo n.º 3
0
def test_remove_group_user_association_not_in_admin_group(
        session: SessionBase):
    new_user = mc.add_user("test", "*****@*****.**")
    new_group = mc.add_group("test")

    with pytest.raises(AssertionError):
        group = mc.remove_group_user_associations(new_group.id, [new_user.id])
Ejemplo n.º 4
0
def test_remove_group_user_association_in_admin_group(
        session: SessionBase, current_user_in_admin_group):
    new_user = mc.add_user("test", "*****@*****.**")
    new_group = mc.add_group("test")

    group = mc.add_group_user_associations(new_group.id, [new_user.id])
    group = mc.remove_group_user_associations(group.id, [new_user.id])
    assert new_user not in group.users
Ejemplo n.º 5
0
def test_edit_owned(session: SessionBase):
    new_user = mc.add_user("test", "*****@*****.**")
    flask.g.current_user = new_user

    # new_folder_owned = FolderFactory(creator=new_user)
    new_folder_owned = mc.add_folder(name="Test folder",
                                     folder_type=Folder.FolderType.folder,
                                     description="")

    right_owned = mc.get_rights(new_folder_owned.id)

    assert right_owned == EntryRightsEnum.can_edit
Ejemplo n.º 6
0
def set_current_user_from_bearer_token():
    """Use the header Authorization to authenticate the user. If we don't find it, we create it.
    The token is a UUID generated by the first Flask app to receive a new user.

    Important: If no Authorization header is passed, we use the DEFAULT_USER_EMAIL from the configuration settings"""
    import taiga2.controllers.models_controller as mc

    request = flask.request
    config = flask.current_app.config
    user = None
    bearer_token = request.headers.get("Authorization", None)
    default_user_email = config.get("DEFAULT_USER_EMAIL", None)

    if user is None and bearer_token is not None:
        m = re.match("Bearer (\\S+)", bearer_token)
        if m is not None:
            token = m.group(1)
            user = bearer_token_lookup(token)
            if not user:
                # If we did not find the user, we return unauthorized
                flask.abort(401)
            log.debug("Got token %s which mapped to user %s", token,
                      user.email)
        else:
            log.warning("Authorization header malformed: %s", bearer_token)
    else:
        # TODO: Should ask for returning a "Not authenticated" page/response number
        if default_user_email is not None:
            log.critical(
                "DEFAULT_USER_EMAIL is set in config, using that when accessing API"
            )
            try:
                user = mc.get_user_by_email(default_user_email)
            except NoResultFound:
                user = mc.add_user(name=str(uuid.uuid4()),
                                   email=default_user_email)
        else:
            log.critical(
                "A request without authentication has been received. Rejecting."
            )
            # raise Exception("No user passed")
            flask.abort(403)
    flask.g.current_user = user
Ejemplo n.º 7
0
def populate_db(dataset_csv_path, dataset_version_with_datafile_csv_path):
    # TODO: We should handle the Public folder properly, instead of adding it to Philip's account
    # Summary
    nb_user_created = 0
    nb_user_skipped = 0
    nb_dataset_created = 0
    nb_row_dataset_skipped = 0
    nb_datafile_created = 0
    nb_datafile_skipped = 0
    nb_row_datafile_skipped = 0
    nb_dataset_version_created = 0
    nb_dataset_version_skipped = 0

    # Dictionary to link find the dataset matching the dataset via the permanames to create the dataset versions
    # Dict<String, Array<int>>
    dict_permaname_datafile_ids = {}

    # We first manage the dataset creation
    with open(dataset_csv_path) as dataset_file:
        print("Creating the users and the datasets")
        reader = csv.DictReader(dataset_file)

        for row in reader:
            is_public = False

            if not row["permaname"]:
                print(
                    "Warning: We found an empty permaname entry: {}. Skipping it."
                    .format(row))
                nb_row_dataset_skipped += 1
                continue

            dataset_name = row["name"]
            dataset_permaname = row["permaname"]
            dataset_description = row["description"]

            if row["folder"].startswith("home"):
                dataset_folder_user = row["folder"]

                # To get the user from dataset_folder_user, we extract the user from the parenthesis
                dataset_user_email = dataset_folder_user[
                    dataset_folder_user.find("(") +
                    1:dataset_folder_user.find(")")]

                # Handle the case where user email is None
                if dataset_user_email == "None":
                    print(
                        "Warning: We found a row with folder {}. Skipping it.".
                        format(row["folder"]))
                    nb_user_skipped += 1
                    continue

                # To get the target folder, we take the string before the parenthesis
                dataset_folder_name = dataset_folder_user.split("(")[0]
            else:
                # For now, we store all the others into [email protected]
                is_public = True
                dataset_folder_name = row["folder"]
                dataset_user_email = "*****@*****.**"

            # Setting up the user
            try:
                dataset_current_user = models_controller.get_user_by_email(
                    dataset_user_email)
            except NoResultFound:
                # User does not exists yet, so we create it
                dataset_user_name = dataset_user_email[:dataset_user_email.
                                                       find("@")]
                dataset_current_user = models_controller.add_user(
                    name=dataset_user_name, email=dataset_user_email)
                print("User with email: {} created".format(dataset_user_email))
                nb_user_created += 1

            flask.g.current_user = dataset_current_user

            # TODO: We should not create the dataset if it already exists
            new_dataset = models_controller.add_dataset(
                name=dataset_name,
                permaname=dataset_permaname,
                description=dataset_description,
            )
            try:
                # TODO: Check it is case insensitive
                if str.lower(dataset_folder_name) == "home":
                    dataset_folder = dataset_current_user.home_folder
                elif str.lower(dataset_folder_name) == "trash":
                    dataset_folder = dataset_current_user.trash_folder
                else:
                    dataset_folder = models_controller.get_folder_by_name(
                        dataset_folder_name)
            except NoResultFound:
                # If no result, it means we need to create the folder in the user space or in public
                dataset_folder = models_controller.add_folder(
                    name=dataset_folder_name,
                    folder_type=models_controller.Folder.FolderType.folder,
                    description=None,
                )

                if is_public:
                    models_controller.move_to_folder(
                        entry_ids=[dataset_folder.id],
                        current_folder_id=None,
                        target_folder_id=models_controller.get_public_folder().
                        id,
                    )
                else:
                    models_controller.move_to_folder(
                        entry_ids=[dataset_folder.id],
                        current_folder_id=None,
                        target_folder_id=dataset_current_user.home_folder_id,
                    )

            # Now we can move the dataset to the folder
            models_controller.move_to_folder([new_dataset.id], None,
                                             dataset_folder.id)

            # We add the dataset_permaname as key with value an empty array so we can add each matching datafile
            dict_permaname_datafile_ids[dataset_permaname] = []

            nb_dataset_created += 1

    # We then manage the attribution of the dataset_version to the freshly created datasets
    with open(dataset_version_with_datafile_csv_path
              ) as dataset_version_with_datafile_csv:
        print("")
        print("Creating the datafiles")
        reader = csv.DictReader(dataset_version_with_datafile_csv)

        for row in reader:
            if not row["permaname"]:
                print("We found an empty permaname entry: {}. Skipping it.".
                      format(row))
                nb_row_datafile_skipped += 1
                nb_datafile_skipped += 1
                continue

            # We first create the datafiles
            datafile_type = row["type"]
            datafile_name = row.get("name", "data")
            datafile_s3_location = urlparse(row["s3_location"])
            datafile_short_summary = row["short_desc"]
            datafile_long_summary = row.get("long_desc", "")
            datafile_id = row["id"]
            datafile_creation_date = row["created_timestamp"]
            datafile_version = row["version"]
            datafile_created_by = row["created_by"]

            dataset_permaname = row["permaname"]

            # s3://taiga2/imported/4bb2169e-5b87-4d1c-a78e-3e6006316561.hdf5
            datafile_s3_bucket = datafile_s3_location.netloc
            datafile_s3_key = datafile_s3_location.path[
                1:]  # We remove the first '/'

            # Set the user to the one in the row to make the manipulations under his name
            try:
                current_user = models_controller.get_user_by_email(
                    datafile_created_by)
            except NoResultFound:
                print(
                    "Warning: The user email found in 'created_by' column ({}) was not found in the dataset side. "
                    "Creating one.".format(datafile_created_by))
                datafile_created_by_name = datafile_created_by[:
                                                               datafile_created_by
                                                               .find("@")]
                current_user = models_controller.add_user(
                    name=datafile_created_by_name, email=datafile_created_by)
                nb_user_created += 1

            flask.g.current_user = current_user

            # TODO: We should not create the datafile if it already exists: ie s3_bucket/s3_key exists
            new_datafile = models_controller.add_s3_datafile(
                s3_bucket=datafile_s3_bucket,
                s3_key=datafile_s3_key,
                name=datafile_name,
                type=datafile_type,
                short_summary=datafile_short_summary,
                long_summary=datafile_long_summary,
            )

            # We register the datafile with its permaname dataset to later create the dataset version
            # with all the datafiles
            if dataset_permaname in dict_permaname_datafile_ids:
                datafile_info = DataFileInfo(
                    id=datafile_id,
                    datafile=new_datafile,
                    version=datafile_version,
                    creation_date=datafile_creation_date,
                    owner_email=datafile_created_by,
                )
                dict_permaname_datafile_ids[dataset_permaname].append(
                    datafile_info)
            else:
                print(
                    "Warning: We found a dataset ({}) without a matching dataset ({}). Skipping it."
                    .format(datafile_id, dataset_permaname))
                nb_datafile_skipped += 1
                continue

            nb_datafile_created += 1

    # Then we create the dataset_version with the taiga id, linking with the dataset using its permaname
    print("")
    print("Linking the datafiles with the datasets")
    for dataset_permaname, array_data_file_info in dict_permaname_datafile_ids.items(
    ):
        dataset = models_controller.get_dataset_from_permaname(
            dataset_permaname)

        # Get the creation date from the first dataset_version
        for datafile_info in array_data_file_info:
            flask.g.current_user = models_controller.get_user_by_email(
                datafile_info.owner_email)
            # TODO: We should not create the dataset_version if it already exists. ie version already exists for this dataset
            dataset_version = models_controller.add_dataset_version(
                dataset_id=dataset.id,
                datafiles_ids=[datafile_info.datafile.id],
                anterior_creation_date=datafile_info.creation_date,
                forced_id=datafile_info.id,
            )

            # Then we edit the dataset version creation_date to the
            if int(datafile_info.version) == 1:
                models_controller.update_dataset_creation_date(
                    dataset_id=dataset.id,
                    new_date=datafile_info.creation_date)

        nb_dataset_version_created += 1

    print("")
    print("Done! Here is the summary:")
    print("\tLines skipped in dataset file: {}".format(nb_row_dataset_skipped))
    print(
        "\tLines skipped in datafile file: {}".format(nb_row_datafile_skipped))
    print("")
    print("\tDatasets created: {}".format(nb_dataset_created))
    print("\tUsers created: {}".format(nb_user_created))
    print("\tUsers skipped: {}".format(nb_user_skipped))
    print("")
    print("\tDatafiles created: {}".format(nb_datafile_created))
    print("\tDatafiles skipped: {}".format(nb_datafile_skipped))
    print("")
    print("\tDatasetVersions created: {}".format(nb_dataset_version_created))
    print("\tDatasetVersions skipped and datasets cleaned: {}".format(
        nb_dataset_version_skipped))
Ejemplo n.º 8
0
def new_user():
    user_name = "new user"
    user_email = "*****@*****.**"
    _new_user = models_controller.add_user(name=user_name, email=user_email)
    return _new_user
Ejemplo n.º 9
0
def user_id(db):
    u = mc.add_user(TEST_USER_NAME, TEST_USER_EMAIL)
    return u.id
def create_db_and_populate():
    create_db()

    admin_group = models_controller.get_group_by_name("Admin")

    # Create the Admin user
    admin_user = models_controller.add_user(name="admin",
                                            email="*****@*****.**",
                                            token="test-token")
    admin_group.users.append(admin_user)
    home_folder_admin = admin_user.home_folder

    # Setting up the flask user
    flask.g.current_user = admin_user

    # Create a session where all this is happening
    upload_session_origin = models_controller.add_new_upload_session()

    # Create the origin data
    upload_session_file_origin = models_controller.add_upload_session_s3_file(
        session_id=upload_session_origin.id,
        filename="origin",
        s3_bucket=bucket_name,
        initial_file_type=models.InitialFileType.Raw,
        initial_s3_key="x",
        encoding="UTF-8",
    )

    origin_dataset = models_controller.add_dataset_from_session(
        session_id=upload_session_origin.id,
        dataset_name="origin",
        dataset_description="No description",
        current_folder_id=home_folder_admin.id,
    )

    # Create the Folder A folder
    folderA = models_controller.add_folder(
        name="Folder A",
        folder_type=models.Folder.FolderType.folder,
        description="desc")
    models_controller.add_folder_entry(folder_id=home_folder_admin.id,
                                       entry_id=folderA.id)

    # Create Folder B inside Folder A
    folderB = models_controller.add_folder(
        name="Folder B",
        folder_type=models.Folder.FolderType.folder,
        description="")
    models_controller.add_folder_entry(folder_id=folderA.id,
                                       entry_id=folderB.id)

    # Create Data inside Folder B
    upload_session_data = models_controller.add_new_upload_session()
    upload_session_file_data = models_controller.add_upload_session_s3_file(
        session_id=upload_session_data.id,
        filename="Data",
        s3_bucket=bucket_name,
        initial_file_type=models.InitialFileType.Raw,
        initial_s3_key="y",
        encoding="UTF-8",
    )

    data = models_controller.add_dataset_from_session(
        session_id=upload_session_data.id,
        dataset_name="Data",
        dataset_description="No description",
        current_folder_id=folderB.id,
    )

    data_datafiles = get_latest_version_datafiles_from_dataset(data.id)

    temp_data_datafiles = copy.copy(data_datafiles)

    # Create A1 Data/A2 Data/A3 Data inside Folder A
    for i in range(1, 4):
        name = "".join(["A", str(i), " DatasetVersion"])

        # We need now to generate new datafiles
        if i >= 1:
            loop_datafiles = []
            for datafile in temp_data_datafiles:
                loop_datafile = models_controller.add_s3_datafile(
                    name=datafile.name + "v" + str(i),
                    s3_bucket=bucket_name,
                    s3_key=models_controller.generate_convert_key(),
                    compressed_s3_key=models_controller.
                    generate_compressed_key(),
                    type=datafile.format,
                    encoding="UTF-8",
                    short_summary="short summary",
                    long_summary="long_summary",
                )
                loop_datafiles.append(loop_datafile)
            temp_data_datafiles = loop_datafiles
        datafiles_id = [datafile.id for datafile in temp_data_datafiles]
        dataAX = models_controller.add_dataset_version(
            dataset_id=origin_dataset.id, datafiles_ids=datafiles_id)

    # create a sample dataset in a known location with a known permaname
    create_sample_dataset(forced_permaname="sample-1", folder_id="public")