def get_indeed_jobs(limit): url = "https://www.indeed.com/jobs?q=software+engineer+intern&l=San+Ramon%2C+CA" start = 0 excluded = 0 while start < limit: page = requests.get(url) soup = BeautifulSoup(page.content, 'html.parser') res = soup.find(id='resultsCol') jobs = res.find_all('div', class_='result') for job in jobs: ijob_id = job.get('data-jk') job_url = url + ijob_id title = job.find('a', class_="jobtitle").text.replace("\n", "") try: location = job.find('span', class_="location").text except AttributeError: location = "" try: company = job.find('a', { "data-tn-element": "companyName" }).text.replace("\n", "") except AttributeError: company = job.find('span', class_="company").text.replace("\n", "") db_job = models.Job(job_id=ijob_id, title=title, company=company, location=location, url=job_url, source="Indeed") db.add(db_job) try: db.commit() except exc.IntegrityError: print(ijob_id) excluded += 1 db.rollback() continue url = url_incr(url, start) start += 10 print(excluded)
def get_monster_jobs(end): url = "https://www.monster.com/jobs/search/?q=software-engineer-intern&where=94583&stpage=1&page=" + str( end) page = requests.get(url) soup = BeautifulSoup(page.content, 'html.parser') res = soup.find(id="ResultsContainer") jobs = res.find_all('section', class_="card-content") #rint(jobs) for job in jobs: try: mjob_id = job.get('data-jobid') #print(mjob_id) job_url = url + '&jobid=' + str(mjob_id) title = job.find('h2', class_='title').find('a').text.replace('\n', "") company = job.find('div', class_="company").find( 'span', class_="name").text.replace("\n", "") location = job.find('div', class_="location").find( 'span', class_="name").text.replace("\n", "") db_job = models.Job(job_id=mjob_id, title=title, company=company, location=location, url=job_url, source="Monster") db.add(db_job) try: db.commit() except exc.IntegrityError: db.rollback() continue except AttributeError: continue
def _transaction( self, job: Dict[str, Any], pipeline_run_spec: Dict[str, Any], pipeline_definitions: List[Dict[str, Any]], pipeline_run_ids: List[str], ): db.session.add(models.Job(**job)) # So that the job can be returned with all runs. job["pipeline_runs"] = [] # To be later used by the collateral effect function. tasks_to_launch = [] for pipeline_definition, id_ in zip(pipeline_definitions, pipeline_run_ids): # Note: the pipeline definition contains the parameters of # the specific run. pipeline_run_spec["pipeline_definition"] = pipeline_definition pipeline = construct_pipeline(**pipeline_run_spec) # Specify the task_id beforehand to avoid race conditions # between the task and its presence in the db. task_id = str(uuid.uuid4()) tasks_to_launch.append((task_id, pipeline)) non_interactive_run = { "job_uuid": job["job_uuid"], "run_uuid": task_id, "pipeline_run_id": id_, "pipeline_uuid": pipeline.properties["uuid"], "project_uuid": job["project_uuid"], "status": "PENDING", } db.session.add( models.NonInteractivePipelineRun(**non_interactive_run)) # Need to flush because otherwise the bulk insertion of # pipeline steps will lead to foreign key errors. # https://docs.sqlalchemy.org/en/13/orm/persistence_techniques.html#bulk-operations-caveats db.session.flush() # TODO: this code is also in `namespace_runs`. Could # potentially be put in a function for modularity. # Set an initial value for the status of the pipeline # steps that will be run. step_uuids = [s.properties["uuid"] for s in pipeline.steps] pipeline_steps = [] for step_uuid in step_uuids: pipeline_steps.append( models.PipelineRunStep( **{ "run_uuid": task_id, "step_uuid": step_uuid, "status": "PENDING", })) db.session.bulk_save_objects(pipeline_steps) non_interactive_run["pipeline_steps"] = pipeline_steps job["pipeline_runs"].append(non_interactive_run) self.collateral_kwargs["job"] = job self.collateral_kwargs["tasks_to_launch"] = tasks_to_launch self.collateral_kwargs["pipeline_run_spec"] = pipeline_run_spec return job
name = fake.name().split() u = models.User(first_name=name[0], last_name=name[1], email=fake.email(), password=bcrypt.hashpw('heythere'.encode('UTF_8'), bcrypt.gensalt(14))) p = models.Profile(user=u) c = models.Company(owner=u, name=comName, address=fake.address()) db.session.add(u) db.session.add(p) db.session.add(c) db.session.commit() for job in data[comName]: if pd.isnull(job): break db.session.add(models.Job(title=job, company=c)) db.session.commit() # for i in range(data[0,:].size): # name = fake.name().split() # u = models.User(first_name=name[0],last_name=name[1],email=fake.email(),password=bcrypt.hashpw('heythere'.encode('UTF_8'),bcrypt.gensalt(14))) # p = models.Profile(user=u) # c = models.Company(owner=u,name=data[0,i],address=fake.address()) # db.session.add(u) # db.session.add(p) # db.session.add(c) # for job in data[:,i]: # db.session.add(models.Job(title=job,company=c)) # db.session.commit()
def post(self): """Queues a new job.""" # TODO: possibly use marshal() on the post_data. Note that we # have moved over to using flask_restx # https://flask-restx.readthedocs.io/en/stable/api.html#flask_restx.marshal # to make sure the default values etc. are filled in. try: post_data = request.get_json() scheduled_start = post_data.get("scheduled_start", None) cron_schedule = post_data.get("cron_schedule", None) # To be scheduled ASAP and to be run once. if cron_schedule is None and scheduled_start is None: next_scheduled_time = None # To be scheduled according to argument, to be run once. elif cron_schedule is None: # Expected to be UTC. next_scheduled_time = datetime.fromisoformat(scheduled_start) # To follow a cron schedule. To be run an indefinite amount # of times. elif cron_schedule is not None and scheduled_start is None: if not croniter.is_valid(cron_schedule): raise ValueError(f"Invalid cron schedule: {cron_schedule}") # Check when is the next time the job should be # scheduled starting from now. next_scheduled_time = croniter( cron_schedule, datetime.now(timezone.utc)).get_next(datetime) else: raise ValueError( "Can't define both cron_schedule and scheduled_start.") job = { "uuid": post_data["uuid"], "name": post_data["name"], "project_uuid": post_data["project_uuid"], "pipeline_uuid": post_data["pipeline_uuid"], "pipeline_name": post_data["pipeline_name"], "schedule": cron_schedule, "parameters": post_data["parameters"], "env_variables": get_proj_pip_env_variables(post_data["project_uuid"], post_data["pipeline_uuid"]), "pipeline_definition": post_data["pipeline_definition"], "pipeline_run_spec": post_data["pipeline_run_spec"], "total_scheduled_executions": 0, "next_scheduled_time": next_scheduled_time, "status": "DRAFT", "strategy_json": post_data.get("strategy_json", {}), "created_time": datetime.now(timezone.utc), } db.session.add(models.Job(**job)) db.session.commit() except Exception as e: db.session.rollback() current_app.logger.error(e) return {"message": str(e)}, 500 return marshal(job, schema.job), 201
def updateServiceJobs(self): from app.dbcommon import DBCommon import app.models as models from app import app dbcommon = DBCommon(self.dbSession) # Check if no jobs scheduled s3ExportJobs = dbcommon.getServicesJobs("AllExportJobsS3Status") if not s3ExportJobs: job = models.Job( job_type="AllExportJobsS3Status", job_user=None, job_project=None, job_export_group=None, job_export_project=None, run_frequency_seconds=app.config[ "EXPORTS_PROJECT_FILES_S3_TOP_LEVEL_LOG_FREQUENCY_SECONDS"], job_secrets=None, job_details="") self.dbSession.add(job) self.dbSession.commit() else: for job in s3ExportJobs: job.run_frequency_seconds = app.config[ "EXPORTS_PROJECT_FILES_S3_TOP_LEVEL_LOG_FREQUENCY_SECONDS"] self.dbSession.commit() metricsCollectionJobs = dbcommon.getServicesJobs( "HealthMetricsCollection") if not metricsCollectionJobs: job = models.Job(job_type="HealthMetricsCollection", job_user=None, job_project=None, job_export_group=None, job_export_project=None, run_frequency_seconds=app. config["HEALTHCHECK_SCHEDULE_FREQUENCY_SECONDS"], job_secrets=None, job_details="") self.dbSession.add(job) self.dbSession.commit() else: for job in metricsCollectionJobs: job.run_frequency_seconds = app.config[ "HEALTHCHECK_SCHEDULE_FREQUENCY_SECONDS"] self.dbSession.commit() databasePruneJobs = dbcommon.getServicesJobs("DatabasePrune") if not databasePruneJobs: job = models.Job(job_type="DatabasePrune", job_user=None, job_project=None, job_export_group=None, job_export_project=None, run_frequency_seconds=app. config["DATABASE_PRUNE_FREQUENCY_SECONDS"], job_secrets=None, job_details="") self.dbSession.add(job) self.dbSession.commit() else: for job in databasePruneJobs: job.run_frequency_seconds = app.config[ "DATABASE_PRUNE_FREQUENCY_SECONDS"] self.dbSession.commit()
def create(self, username, projectName, exportGroupName, exportProjectName): respCode = 201 jobData = { "success": None, "message": None, "export_id": None, "export_frequency_seconds": None } try: if not self.dominoAPI.isValidAPIKey(): raise (DominoAPIKeyInvalid) jobType = "ProjectExport" jobRunFrequencyInSeconds = app.config[ "EXPORT_JOB_SCHEDULE_DEFAULT_FREQUENCY_SECONDS"] # Expect to get Exceptions here if the Domino API Key does not provide access to the Project projectInfo = self.dominoAPI.findProjectByOwnerAndName( username, projectName) if not self.dominoAPI.hasAccessToProject(username, projectName): raise (DominoAPIUnauthorized) # Check export group and project names for compliance with Docker Registry naming requirements if not self.reDockerRegistryName.match(exportGroupName): if self.reDockerRegistryName.match(exportGroupName.lower()): exportGroupName = exportGroupName.lower() jobMessageFormat = "{MESSAGE}" if jobData["message"]: jobMessageFormat = "{ORIGINAL}; {MESSAGE}" jobData["message"] = jobMessageFormat.format( ORIGINAL=jobData["message"], MESSAGE= "Warning: request has been processed, but the Export Group Name has been automatically converted to lower case to comply with Docker Registry standards" ) else: raise (ExportAPIInvalidExportGroupName) if not self.reDockerRegistryName.match(exportProjectName): if self.reDockerRegistryName.match(exportProjectName.lower()): exportProjectName = exportProjectName.lower() jobMessageFormat = "{MESSAGE}" if jobData["message"]: jobMessageFormat = "{ORIGINAL}; {MESSAGE}" jobData["message"] = jobMessageFormat.format( ORIGINAL=jobData["message"], MESSAGE= "Warning: request has been processed, but the Export Project Name has been automatically converted to lower case to comply with Docker Registry standards" ) else: raise (ExportAPIInvalidExportProjectName) # Expect to get Exceptions here if the job already exists self.dbCommon.raiseOnJobExists( username, projectName, exportGroupName, exportProjectName, app.config.get("ALLOW_SAME_PROJECT_EXPORTS", False)) # Do the actual work here jobDetails = { "taskState": { "ProjectFilesExportTask": { "lastCompletedExecutionID": None, "commitID": None }, "ProjectDockerImageExportTask": { "lastCompletedExecutionID": None, "computeEnvironmentID": None, "computeEnvironmentRevision": None }, "ProjectExportReportToS3Task": { "lastCompletedExecutionID": None, "statusSaved": False } }, "dockerBuildTemplateFile": "Standard.Dockerfile" } job = models.Job(job_type=jobType, job_user=username.lower(), job_project=projectName, job_export_group=exportGroupName, job_export_project=exportProjectName, run_frequency_seconds=jobRunFrequencyInSeconds, job_secrets=encrypter.encrypt(self.dominoAPIKey), job_details=encrypter.encrypt( json.dumps(jobDetails))) self.dbSession.add(job) self.dbSession.commit() jobData["success"] = True jobData["export_id"] = job.export_id jobData["export_frequency_seconds"] = job.run_frequency_seconds # Schedule job with scheduler scheduler.addJob(job.job_id, True) except BadRequest: respCode = 400 jobData["success"] = False jobData["message"] = StatusTypes.messageFromType[ "ExportAPIMalformedJSON"] except DominoAPINotFound: respCode = 400 jobData["success"] = False jobData["message"] = StatusTypes.messageFromType[ "ExportAPIProjectNotExist"] except (DominoAPIKeyInvalid, DominoAPIUnauthorized): respCode = 401 jobData["success"] = False jobData["message"] = StatusTypes.messageFromType[ "ExportAPIProjectNoAccess"] except DBExportJobExists: respCode = 409 jobData["success"] = False jobData["message"] = StatusTypes.messageFromType[ "ExportAPIExportNameConflict"] except DBProjectJobExists: respCode = 409 jobData["success"] = False jobData["message"] = StatusTypes.messageFromType[ "ExportAPIDominoNameConflict"] except ExportAPIInvalidExportGroupName: respCode = 422 jobData["success"] = False jobData["message"] = StatusTypes.messageFromType[ "ExportAPIInvalidExportGroupName"] except ExportAPIInvalidExportProjectName: respCode = 422 jobData["success"] = False jobData["message"] = StatusTypes.messageFromType[ "ExportAPIInvalidExportProjectName"] except (DominoAPIUnexpectedError, Exception) as e: respCode = 503 jobData["success"] = False jobData["message"] = StatusTypes.messageFromType[ "UnknownError"].format(repr(e)) raise (e) return (respCode, jobData)