Ejemplo n.º 1
0
def filterData2(cols=['HITId','HITTypeId','Title','Description','Keywords','Reward',
	'CreationTime','MaxAssignments','RequesterAnnotation','AssignmentDurationInSeconds',
	'AutoApprovalDelayInSeconds','Expiration','NumberOfSimilarHITs','LifetimeInSeconds',
	'AssignmentId','WorkerId','AssignmentStatus','AcceptTime','SubmitTime','AutoApprovalTime',
	'ApprovalTime','RejectionTime','RequesterFeedback','WorkTimeInSeconds','LifetimeApprovalRate',
	'Last30DaysApprovalRate','Last7DaysApprovalRate','Input.pv_id','Input.global_user_id',
	'Input.time','Input.declaration','Answer.Q1','Approve','Reject']):

	paths = qbPre.listFiles(qbGbl.oriFileName)
	filData = qbPre.readFiles(paths)

	filData = filData[cols];

	filData.to_csv(qbGbl.filFileName,index = False,encoding='utf-8');
Ejemplo n.º 2
0
def getReliableData(cols=[
    'HITId', 'HITTypeId', 'Title', 'Description', 'Keywords', 'Reward',
    'CreationTime', 'MaxAssignments', 'RequesterAnnotation',
    'AssignmentDurationInSeconds', 'AutoApprovalDelayInSeconds', 'Expiration',
    'NumberOfSimilarHITs', 'LifetimeInSeconds', 'AssignmentId', 'WorkerId',
    'AssignmentStatus', 'AcceptTime', 'SubmitTime', 'AutoApprovalTime',
    'ApprovalTime', 'RejectionTime', 'RequesterFeedback', 'WorkTimeInSeconds',
    'LifetimeApprovalRate', 'Last30DaysApprovalRate', 'Last7DaysApprovalRate',
    'Input.pv_id', 'Input.global_user_id', 'Input.time', 'Input.declaration',
    'Answer.Q1', 'Approve', 'Reject'
]):

    dataSet = qbRel.analyseWorkers()
    badEntries = qbRel.pickBadEntries(dataSet)

    # print dataSet

    dataSet = dataSet.drop(badEntries.index)

    dataSet = dataSet[['WorkerId', 'Input.declaration', 'Answer.Q1']]
    silverSet = dataSet['Input.declaration']

    # print silverSet

    # load the fulDataset

    paths = qbPre.listFiles(qbGbl.oriFileName)
    filData = qbPre.readFiles(paths)

    filData = filData[cols]

    filData.index = (xrange(0, len(filData)))

    # remove the observations that were verified earlier as the silver set
    dups = pd.DataFrame()
    for dec in silverSet:
        # dups = dups.append(filData[filData['Input.declaration']==dec])
        filData = filData.drop(
            filData[filData['Input.declaration'] == dec].index)

    badEntries = qbRel.pickBadObs(filData)

    filData = filData.drop(badEntries.index)

    dataSet = dataSet.append(filData)
    dataSet = dataSet[['WorkerId', 'Input.declaration', 'Answer.Q1']]
    dataSet.index = (xrange(0, len(dataSet)))

    # print dataSet
    dataSet.to_csv(qbGbl.finalReaderFile, header=False)
Ejemplo n.º 3
0
def getReliableData(cols=['HITId','HITTypeId','Title','Description','Keywords','Reward',
	'CreationTime','MaxAssignments','RequesterAnnotation','AssignmentDurationInSeconds',
	'AutoApprovalDelayInSeconds','Expiration','NumberOfSimilarHITs','LifetimeInSeconds',
	'AssignmentId','WorkerId','AssignmentStatus','AcceptTime','SubmitTime','AutoApprovalTime',
	'ApprovalTime','RejectionTime','RequesterFeedback','WorkTimeInSeconds','LifetimeApprovalRate',
	'Last30DaysApprovalRate','Last7DaysApprovalRate','Input.pv_id','Input.global_user_id',
	'Input.time','Input.declaration','Answer.Q1','Approve','Reject']):

	dataSet = qbRel.analyseWorkers();
	badEntries = qbRel.pickBadEntries(dataSet)

	# print dataSet

	dataSet = dataSet.drop(badEntries.index)

	dataSet = dataSet[['WorkerId','Input.declaration','Answer.Q1']]
	silverSet = dataSet['Input.declaration']

	# print silverSet

	# load the fulDataset

	paths = qbPre.listFiles(qbGbl.oriFileName)
	filData = qbPre.readFiles(paths)

	filData = filData[cols];

	filData.index = (xrange(0,len(filData)))

	# remove the observations that were verified earlier as the silver set 
	dups = pd.DataFrame()
	for dec in silverSet:
		# dups = dups.append(filData[filData['Input.declaration']==dec])
		filData = filData.drop(filData[filData['Input.declaration']==dec].index)
	
	badEntries = qbRel.pickBadObs(filData)

	filData = filData.drop(badEntries.index)
	
	dataSet = dataSet.append(filData)
	dataSet = dataSet[['WorkerId','Input.declaration','Answer.Q1']]
	dataSet.index = (xrange(0,len(dataSet)))

	# print dataSet
	dataSet.to_csv(qbGbl.finalReaderFile,header=False);
Ejemplo n.º 4
0
def filterData2(cols=[
    'HITId', 'HITTypeId', 'Title', 'Description', 'Keywords', 'Reward',
    'CreationTime', 'MaxAssignments', 'RequesterAnnotation',
    'AssignmentDurationInSeconds', 'AutoApprovalDelayInSeconds', 'Expiration',
    'NumberOfSimilarHITs', 'LifetimeInSeconds', 'AssignmentId', 'WorkerId',
    'AssignmentStatus', 'AcceptTime', 'SubmitTime', 'AutoApprovalTime',
    'ApprovalTime', 'RejectionTime', 'RequesterFeedback', 'WorkTimeInSeconds',
    'LifetimeApprovalRate', 'Last30DaysApprovalRate', 'Last7DaysApprovalRate',
    'Input.pv_id', 'Input.global_user_id', 'Input.time', 'Input.declaration',
    'Answer.Q1', 'Approve', 'Reject'
]):

    paths = qbPre.listFiles(qbGbl.oriFileName)
    filData = qbPre.readFiles(paths)

    filData = filData[cols]

    filData.to_csv(qbGbl.filFileName, index=False, encoding='utf-8')