qcnum = random.randint(0, settings["num_unknowns"] + settings["num_knowns"] -1) guid=str(uuid.uuid4()) sql="SELECT add_hit(%s, %s, %s, %s, %s, %s, %s);" cur2.execute(sql,("", guid, hittype_id, lang_id, 0, 0, 0)) hit_id = cur2.fetchone()[0] if(not(hit_id in sentcounts)): sentcounts.append(hit_id) logging.info("Batch added") sents = [] sentids = [] for item in batchiter: doc_id = item[4] candidates = controls.pull_candidates(doc_id.split('_')[0]) idsql = 'SELECT sentence from esl_sentences where doc_id=%s;' cur2.execute(idsql, (doc_id,)) sents.append(cur2.fetchone()[0]) sentids.append(doc_id) b = controls.best_control(sents, candidates, dfs) print b cid = controls.insert_into_db("CONTROL "+b, cur2) conn.commit() #outfile.write(b+'\n') for s in enumerate(sentids): print s i = s[0] if(i == qcnum): sql="INSERT INTO esl_hits_data (hit_id, esl_sentence_id, language_id, sentence_num) VALUES (%s,%s,%s,%s);" cur2.execute(sql,(hit_id, cid, lang_id, i))
#group senteces into groups of 5 for HITs sents = [] sentids = [] docids = [] idsforhit = [0]*(settings["num_unknowns"] + settings["num_knowns"]) for item in batchiter: doc_id = item[4] docids.append(doc_id.split('_')[0]) idsql = 'SELECT sentence from esl_sentences where doc_id=%s;' cur2.execute(idsql, (doc_id,)) sents.append(cur2.fetchone()[0]) sentids.append(doc_id) if(args.reload): doc = list(docids)[0] print "Running control query for doc", doc_id candidates = controls.pull_candidates(doc) #choose the control sentence that fits best with the 5 real sentences b = controls.best_control(sents, candidates, dfs) bb = b[0][0] print bb logfile.write(bb+'\n') newb = generrors.randerr(bb) cid = controls.insert_into_db(hit_id, newb, bb, cur2, qcnum) else: bb = cachesents.pop() newb = generrors.randerr(bb) cid = controls.insert_into_db(hit_id, newb, bb, cur2, qcnum) if(cid == -1): print "Error inserting control sentence to DB" break;