# Now that we have primary keys for each SNP, read in loci data with open(curLociFilePath,'r') as csvfile: data = csv.reader(csvfile,delimiter='\t') for row in data: if(len(row) == 4 and row[0] in documents): # Load loci in Mongo documents curDoc = documents[row[0]] if curDoc["loci"] is None: curDoc["loci"] = [{"mrna_acc":row[1],"gene":row[2],"class":row[3]}] else: curDoc["loci"].append({"mrna_acc":row[1],"gene":row[2],"class":row[3]}) documents[row[0]] = curDoc # Data for reporting result.lociLoadEnd = time.time() result.totalDocuments = len(documents) print "Starting to insert " + str(result.totalDocuments) + " documents" # Log start time for MongoDB inserts result.documentInsertStart = time.time() if bulk: print "Bulk insertion starting" mongoCollection.insert(documents.values()) elif mongoimport: mimpfile = "jsonchr" + str(curChr) + ".json" print "Writing json file for mongoimport" fp = open(mimpfile,'w') for curDoc in documents.values(): json.dump(curDoc,fp)