Esempio n. 1
0
def get_indeed_jobs(limit):
    url = "https://www.indeed.com/jobs?q=software+engineer+intern&l=San+Ramon%2C+CA"
    start = 0

    excluded = 0
    while start < limit:

        page = requests.get(url)

        soup = BeautifulSoup(page.content, 'html.parser')

        res = soup.find(id='resultsCol')
        jobs = res.find_all('div', class_='result')
        for job in jobs:

            ijob_id = job.get('data-jk')

            job_url = url + ijob_id
            title = job.find('a', class_="jobtitle").text.replace("\n", "")
            try:
                location = job.find('span', class_="location").text
            except AttributeError:
                location = ""
            try:
                company = job.find('a', {
                    "data-tn-element": "companyName"
                }).text.replace("\n", "")
            except AttributeError:
                company = job.find('span',
                                   class_="company").text.replace("\n", "")
            db_job = models.Job(job_id=ijob_id,
                                title=title,
                                company=company,
                                location=location,
                                url=job_url,
                                source="Indeed")
            db.add(db_job)
            try:
                db.commit()
            except exc.IntegrityError:
                print(ijob_id)
                excluded += 1
                db.rollback()
                continue
        url = url_incr(url, start)
        start += 10
    print(excluded)
Esempio n. 2
0
def get_monster_jobs(end):
    url = "https://www.monster.com/jobs/search/?q=software-engineer-intern&where=94583&stpage=1&page=" + str(
        end)
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    res = soup.find(id="ResultsContainer")
    jobs = res.find_all('section', class_="card-content")
    #rint(jobs)

    for job in jobs:
        try:

            mjob_id = job.get('data-jobid')
            #print(mjob_id)
            job_url = url + '&jobid=' + str(mjob_id)
            title = job.find('h2',
                             class_='title').find('a').text.replace('\n', "")
            company = job.find('div', class_="company").find(
                'span', class_="name").text.replace("\n", "")
            location = job.find('div', class_="location").find(
                'span', class_="name").text.replace("\n", "")
            db_job = models.Job(job_id=mjob_id,
                                title=title,
                                company=company,
                                location=location,
                                url=job_url,
                                source="Monster")

            db.add(db_job)
            try:
                db.commit()
            except exc.IntegrityError:
                db.rollback()
                continue
        except AttributeError:
            continue
Esempio n. 3
0
    def _transaction(
        self,
        job: Dict[str, Any],
        pipeline_run_spec: Dict[str, Any],
        pipeline_definitions: List[Dict[str, Any]],
        pipeline_run_ids: List[str],
    ):

        db.session.add(models.Job(**job))
        # So that the job can be returned with all runs.
        job["pipeline_runs"] = []
        # To be later used by the collateral effect function.
        tasks_to_launch = []

        for pipeline_definition, id_ in zip(pipeline_definitions,
                                            pipeline_run_ids):
            # Note: the pipeline definition contains the parameters of
            # the specific run.
            pipeline_run_spec["pipeline_definition"] = pipeline_definition
            pipeline = construct_pipeline(**pipeline_run_spec)

            # Specify the task_id beforehand to avoid race conditions
            # between the task and its presence in the db.
            task_id = str(uuid.uuid4())
            tasks_to_launch.append((task_id, pipeline))

            non_interactive_run = {
                "job_uuid": job["job_uuid"],
                "run_uuid": task_id,
                "pipeline_run_id": id_,
                "pipeline_uuid": pipeline.properties["uuid"],
                "project_uuid": job["project_uuid"],
                "status": "PENDING",
            }
            db.session.add(
                models.NonInteractivePipelineRun(**non_interactive_run))
            # Need to flush because otherwise the bulk insertion of
            # pipeline steps will lead to foreign key errors.
            # https://docs.sqlalchemy.org/en/13/orm/persistence_techniques.html#bulk-operations-caveats
            db.session.flush()

            # TODO: this code is also in `namespace_runs`. Could
            #       potentially be put in a function for modularity.
            # Set an initial value for the status of the pipeline
            # steps that will be run.
            step_uuids = [s.properties["uuid"] for s in pipeline.steps]
            pipeline_steps = []
            for step_uuid in step_uuids:
                pipeline_steps.append(
                    models.PipelineRunStep(
                        **{
                            "run_uuid": task_id,
                            "step_uuid": step_uuid,
                            "status": "PENDING",
                        }))
            db.session.bulk_save_objects(pipeline_steps)

            non_interactive_run["pipeline_steps"] = pipeline_steps
            job["pipeline_runs"].append(non_interactive_run)

        self.collateral_kwargs["job"] = job
        self.collateral_kwargs["tasks_to_launch"] = tasks_to_launch
        self.collateral_kwargs["pipeline_run_spec"] = pipeline_run_spec

        return job
Esempio n. 4
0
    name = fake.name().split()
    u = models.User(first_name=name[0],
                    last_name=name[1],
                    email=fake.email(),
                    password=bcrypt.hashpw('heythere'.encode('UTF_8'),
                                           bcrypt.gensalt(14)))
    p = models.Profile(user=u)
    c = models.Company(owner=u, name=comName, address=fake.address())
    db.session.add(u)
    db.session.add(p)
    db.session.add(c)
    db.session.commit()
    for job in data[comName]:
        if pd.isnull(job):
            break
        db.session.add(models.Job(title=job, company=c))
    db.session.commit()

# for i in range(data[0,:].size):
#     name = fake.name().split()
#     u = models.User(first_name=name[0],last_name=name[1],email=fake.email(),password=bcrypt.hashpw('heythere'.encode('UTF_8'),bcrypt.gensalt(14)))
#     p = models.Profile(user=u)
#     c = models.Company(owner=u,name=data[0,i],address=fake.address())
#     db.session.add(u)
#     db.session.add(p)
#     db.session.add(c)
#     for job in data[:,i]:
#         db.session.add(models.Job(title=job,company=c))

#     db.session.commit()
Esempio n. 5
0
    def post(self):
        """Queues a new job."""
        # TODO: possibly use marshal() on the post_data. Note that we
        # have moved over to using flask_restx
        # https://flask-restx.readthedocs.io/en/stable/api.html#flask_restx.marshal
        #       to make sure the default values etc. are filled in.

        try:
            post_data = request.get_json()

            scheduled_start = post_data.get("scheduled_start", None)
            cron_schedule = post_data.get("cron_schedule", None)

            # To be scheduled ASAP and to be run once.
            if cron_schedule is None and scheduled_start is None:
                next_scheduled_time = None

            # To be scheduled according to argument, to be run once.
            elif cron_schedule is None:
                # Expected to be UTC.
                next_scheduled_time = datetime.fromisoformat(scheduled_start)

            # To follow a cron schedule. To be run an indefinite amount
            # of times.
            elif cron_schedule is not None and scheduled_start is None:
                if not croniter.is_valid(cron_schedule):
                    raise ValueError(f"Invalid cron schedule: {cron_schedule}")

                # Check when is the next time the job should be
                # scheduled starting from now.
                next_scheduled_time = croniter(
                    cron_schedule,
                    datetime.now(timezone.utc)).get_next(datetime)

            else:
                raise ValueError(
                    "Can't define both cron_schedule and scheduled_start.")

            job = {
                "uuid":
                post_data["uuid"],
                "name":
                post_data["name"],
                "project_uuid":
                post_data["project_uuid"],
                "pipeline_uuid":
                post_data["pipeline_uuid"],
                "pipeline_name":
                post_data["pipeline_name"],
                "schedule":
                cron_schedule,
                "parameters":
                post_data["parameters"],
                "env_variables":
                get_proj_pip_env_variables(post_data["project_uuid"],
                                           post_data["pipeline_uuid"]),
                "pipeline_definition":
                post_data["pipeline_definition"],
                "pipeline_run_spec":
                post_data["pipeline_run_spec"],
                "total_scheduled_executions":
                0,
                "next_scheduled_time":
                next_scheduled_time,
                "status":
                "DRAFT",
                "strategy_json":
                post_data.get("strategy_json", {}),
                "created_time":
                datetime.now(timezone.utc),
            }
            db.session.add(models.Job(**job))
            db.session.commit()

        except Exception as e:
            db.session.rollback()
            current_app.logger.error(e)
            return {"message": str(e)}, 500

        return marshal(job, schema.job), 201
Esempio n. 6
0
    def updateServiceJobs(self):
        from app.dbcommon import DBCommon
        import app.models as models
        from app import app
        dbcommon = DBCommon(self.dbSession)

        # Check if no jobs scheduled
        s3ExportJobs = dbcommon.getServicesJobs("AllExportJobsS3Status")
        if not s3ExportJobs:
            job = models.Job(
                job_type="AllExportJobsS3Status",
                job_user=None,
                job_project=None,
                job_export_group=None,
                job_export_project=None,
                run_frequency_seconds=app.config[
                    "EXPORTS_PROJECT_FILES_S3_TOP_LEVEL_LOG_FREQUENCY_SECONDS"],
                job_secrets=None,
                job_details="")
            self.dbSession.add(job)
            self.dbSession.commit()
        else:
            for job in s3ExportJobs:
                job.run_frequency_seconds = app.config[
                    "EXPORTS_PROJECT_FILES_S3_TOP_LEVEL_LOG_FREQUENCY_SECONDS"]
                self.dbSession.commit()

        metricsCollectionJobs = dbcommon.getServicesJobs(
            "HealthMetricsCollection")
        if not metricsCollectionJobs:
            job = models.Job(job_type="HealthMetricsCollection",
                             job_user=None,
                             job_project=None,
                             job_export_group=None,
                             job_export_project=None,
                             run_frequency_seconds=app.
                             config["HEALTHCHECK_SCHEDULE_FREQUENCY_SECONDS"],
                             job_secrets=None,
                             job_details="")
            self.dbSession.add(job)
            self.dbSession.commit()
        else:
            for job in metricsCollectionJobs:
                job.run_frequency_seconds = app.config[
                    "HEALTHCHECK_SCHEDULE_FREQUENCY_SECONDS"]
                self.dbSession.commit()

        databasePruneJobs = dbcommon.getServicesJobs("DatabasePrune")
        if not databasePruneJobs:
            job = models.Job(job_type="DatabasePrune",
                             job_user=None,
                             job_project=None,
                             job_export_group=None,
                             job_export_project=None,
                             run_frequency_seconds=app.
                             config["DATABASE_PRUNE_FREQUENCY_SECONDS"],
                             job_secrets=None,
                             job_details="")
            self.dbSession.add(job)
            self.dbSession.commit()
        else:
            for job in databasePruneJobs:
                job.run_frequency_seconds = app.config[
                    "DATABASE_PRUNE_FREQUENCY_SECONDS"]
                self.dbSession.commit()
Esempio n. 7
0
    def create(self, username, projectName, exportGroupName,
               exportProjectName):
        respCode = 201
        jobData = {
            "success": None,
            "message": None,
            "export_id": None,
            "export_frequency_seconds": None
        }

        try:
            if not self.dominoAPI.isValidAPIKey():
                raise (DominoAPIKeyInvalid)

            jobType = "ProjectExport"
            jobRunFrequencyInSeconds = app.config[
                "EXPORT_JOB_SCHEDULE_DEFAULT_FREQUENCY_SECONDS"]

            # Expect to get Exceptions here if the Domino API Key does not provide access to the Project
            projectInfo = self.dominoAPI.findProjectByOwnerAndName(
                username, projectName)
            if not self.dominoAPI.hasAccessToProject(username, projectName):
                raise (DominoAPIUnauthorized)

            # Check export group and project names for compliance with Docker Registry naming requirements
            if not self.reDockerRegistryName.match(exportGroupName):
                if self.reDockerRegistryName.match(exportGroupName.lower()):
                    exportGroupName = exportGroupName.lower()

                    jobMessageFormat = "{MESSAGE}"
                    if jobData["message"]:
                        jobMessageFormat = "{ORIGINAL};  {MESSAGE}"

                    jobData["message"] = jobMessageFormat.format(
                        ORIGINAL=jobData["message"],
                        MESSAGE=
                        "Warning: request has been processed, but the Export Group Name has been automatically converted to lower case to comply with Docker Registry standards"
                    )
                else:
                    raise (ExportAPIInvalidExportGroupName)

            if not self.reDockerRegistryName.match(exportProjectName):
                if self.reDockerRegistryName.match(exportProjectName.lower()):
                    exportProjectName = exportProjectName.lower()

                    jobMessageFormat = "{MESSAGE}"
                    if jobData["message"]:
                        jobMessageFormat = "{ORIGINAL};  {MESSAGE}"

                    jobData["message"] = jobMessageFormat.format(
                        ORIGINAL=jobData["message"],
                        MESSAGE=
                        "Warning: request has been processed, but the Export Project Name has been automatically converted to lower case to comply with Docker Registry standards"
                    )
                else:
                    raise (ExportAPIInvalidExportProjectName)

            # Expect to get Exceptions here if the job already exists
            self.dbCommon.raiseOnJobExists(
                username, projectName, exportGroupName, exportProjectName,
                app.config.get("ALLOW_SAME_PROJECT_EXPORTS", False))

            # Do the actual work here
            jobDetails = {
                "taskState": {
                    "ProjectFilesExportTask": {
                        "lastCompletedExecutionID": None,
                        "commitID": None
                    },
                    "ProjectDockerImageExportTask": {
                        "lastCompletedExecutionID": None,
                        "computeEnvironmentID": None,
                        "computeEnvironmentRevision": None
                    },
                    "ProjectExportReportToS3Task": {
                        "lastCompletedExecutionID": None,
                        "statusSaved": False
                    }
                },
                "dockerBuildTemplateFile": "Standard.Dockerfile"
            }
            job = models.Job(job_type=jobType,
                             job_user=username.lower(),
                             job_project=projectName,
                             job_export_group=exportGroupName,
                             job_export_project=exportProjectName,
                             run_frequency_seconds=jobRunFrequencyInSeconds,
                             job_secrets=encrypter.encrypt(self.dominoAPIKey),
                             job_details=encrypter.encrypt(
                                 json.dumps(jobDetails)))

            self.dbSession.add(job)
            self.dbSession.commit()

            jobData["success"] = True
            jobData["export_id"] = job.export_id
            jobData["export_frequency_seconds"] = job.run_frequency_seconds

            # Schedule job with scheduler
            scheduler.addJob(job.job_id, True)

        except BadRequest:
            respCode = 400
            jobData["success"] = False
            jobData["message"] = StatusTypes.messageFromType[
                "ExportAPIMalformedJSON"]
        except DominoAPINotFound:
            respCode = 400
            jobData["success"] = False
            jobData["message"] = StatusTypes.messageFromType[
                "ExportAPIProjectNotExist"]
        except (DominoAPIKeyInvalid, DominoAPIUnauthorized):
            respCode = 401
            jobData["success"] = False
            jobData["message"] = StatusTypes.messageFromType[
                "ExportAPIProjectNoAccess"]
        except DBExportJobExists:
            respCode = 409
            jobData["success"] = False
            jobData["message"] = StatusTypes.messageFromType[
                "ExportAPIExportNameConflict"]
        except DBProjectJobExists:
            respCode = 409
            jobData["success"] = False
            jobData["message"] = StatusTypes.messageFromType[
                "ExportAPIDominoNameConflict"]
        except ExportAPIInvalidExportGroupName:
            respCode = 422
            jobData["success"] = False
            jobData["message"] = StatusTypes.messageFromType[
                "ExportAPIInvalidExportGroupName"]
        except ExportAPIInvalidExportProjectName:
            respCode = 422
            jobData["success"] = False
            jobData["message"] = StatusTypes.messageFromType[
                "ExportAPIInvalidExportProjectName"]
        except (DominoAPIUnexpectedError, Exception) as e:
            respCode = 503
            jobData["success"] = False
            jobData["message"] = StatusTypes.messageFromType[
                "UnknownError"].format(repr(e))
            raise (e)

        return (respCode, jobData)